diff --git a/package.json b/package.json index 7deec33..58e6693 100644 --- a/package.json +++ b/package.json @@ -84,6 +84,7 @@ "delete-user": "yarn run-script scripts/delete-user.js", "export-users-to-auth0": "yarn run-script scripts/export-users-to-auth0.js", "validate-owls-data": "yarn run-script scripts/validate-owls-data.js", + "archive:create": "yarn archive:create:list-urls && yarn archive:create:download-urls && yarn archive:create:upload", "archive:create:list-urls": "yarn run-script scripts/archive/create/list-urls.js", "archive:create:download-urls": "dotenv -- ./scripts/archive/create/download-urls.sh", "aws": "AWS_ACCESS_KEY_ID=$(dotenv -p ARCHIVE_STORAGE_READWRITE_ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(dotenv -p ARCHIVE_STORAGE_READWRITE_SECRET_KEY) aws --endpoint=https://$(dotenv -p ARCHIVE_STORAGE_HOST)", diff --git a/scripts/archive/create/download-urls.sh b/scripts/archive/create/download-urls.sh index 12297bb..e1ef541 100755 --- a/scripts/archive/create/download-urls.sh +++ b/scripts/archive/create/download-urls.sh @@ -1,2 +1,15 @@ echo 'Starting! (Note: If many of the URLs are already downloaded, it will take some time for wget to quietly check them all and find the new ones.)' -xargs --arg-file=$(dirname $0)/urls-cache.txt -P 8 wget --directory-prefix=${ARCHIVE_DIR=$(dirname $0)} --force-directories --no-clobber --timeout=10 --retry-connrefused --retry-on-host-error --no-cookies --compression=auto --https-only --no-verbose \ No newline at end of file +xargs --arg-file=$(dirname $0)/urls-cache.txt -P 8 wget --directory-prefix=${ARCHIVE_DIR=$(dirname $0)} --force-directories --no-clobber --timeout=10 --retry-connrefused --retry-on-host-error --no-cookies --compression=auto --https-only --no-verbose + +# It's expected that xargs will exit with code 123 if wget failed to load some +# of the URLs. So, if it exited with 123, exit this script with 0 (success). +# Otherwise, exit with the code that xargs exited with. +# (It would be nice if we could tell wget or xargs that a 404 isn't a failure? +# And have them succeed instead? But I couldn't find a way to do that!) +XARGS_EXIT_CODE=$? +if [ $XARGS_EXIT_CODE -eq 123 ] +then + exit 0 +else + exit $XARGS_EXIT_CODE +fi diff --git a/scripts/archive/create/list-urls.js b/scripts/archive/create/list-urls.js index afcf399..e55f467 100644 --- a/scripts/archive/create/list-urls.js +++ b/scripts/archive/create/list-urls.js @@ -29,15 +29,6 @@ const connectToDb = require("../../../src/server/db"); const { normalizeRow } = require("../../../src/server/util"); async function main() { - const urlsCacheFileAlreadyExists = await checkIfUrlsCacheFileAlreadyExists(); - if (urlsCacheFileAlreadyExists) { - console.error( - `urls-cache.txt already exists. Please remove it first if you really ` + - `want to rebuild it from scratch!` - ); - return 1; - } - const db = await connectToDb(); const file = await createUrlsCacheFile(); @@ -83,16 +74,6 @@ async function main() { console.info(`Done writing asset URLs.`); } -async function checkIfUrlsCacheFileAlreadyExists() { - const urlsCacheFilePath = path.join(__dirname, "urls-cache.txt"); - try { - await fs.access(urlsCacheFilePath, fs.constants.R_OK); - } catch (error) { - return false; - } - return true; -} - async function createUrlsCacheFile() { const urlsCacheFilePath = path.join(__dirname, "urls-cache.txt"); return await fs.open(urlsCacheFilePath, "w");