diff --git a/package.json b/package.json index b8c9f25..41b2c15 100644 --- a/package.json +++ b/package.json @@ -85,15 +85,17 @@ "export-users-to-auth0": "yarn run-script scripts/export-users-to-auth0.js", "model-needed-items": "yarn run-script scripts/model-needed-items.js", "validate-owls-data": "yarn run-script scripts/validate-owls-data.js", - "archive:create": "yarn archive:create:list-urls && yarn archive:create:download-urls && yarn archive:create:upload", - "archive:create:list-urls": "yarn run-script scripts/archive/create/list-urls.js", - "archive:create:download-urls": "dotenv -- ./scripts/archive/create/download-urls.sh", "aws": "AWS_ACCESS_KEY_ID=$(dotenv -p ARCHIVE_STORAGE_READWRITE_ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(dotenv -p ARCHIVE_STORAGE_READWRITE_SECRET_KEY) aws --endpoint=https://$(dotenv -p ARCHIVE_STORAGE_HOST)", - "archive:create:upload": "yarn aws s3 sync $(dotenv -p ARCHIVE_DIR) s3://$(dotenv -p ARCHIVE_STORAGE_BUCKET)", - "archive:create:upload-test": "yarn aws s3 sync $(dotenv -p ARCHIVE_DIR)/images.neopets.com/cp/items/data/000/000/000/1_8422bedbf9/ s3://$(dotenv -p ARCHIVE_STORAGE_BUCKET)/images.neopets.com/cp/items/data/000/000/000/1_8422bedbf9/", - "archive:create:read-backup-state": "dotenv -- ./scripts/archive/create/read-backup-state.sh", - "archive:create:compute-backup-delta": "dotenv -- ./scripts/archive/create/compute-backup-delta.sh", - "archive:create:download-urls-delta": "dotenv -- ./scripts/archive/create/download-urls-delta.sh" + "archive:full": "yarn archive:prepare:full && yarn archive:create:full && yarn archive:upload:full", + "archive:delta": "yarn archive:prepare:delta && yarn archive:create:delta && yarn archive:upload:delta", + "archive:prepare:full": "yarn run-script scripts/archive/prepare/full.js", + "archive:prepare:remote": "dotenv -- ./scripts/archive/prepare/remote.sh", + "archive:prepare:delta": "dotenv -- ./scripts/archive/prepare/delta.sh", + "archive:create:full": "dotenv -- ./scripts/archive/create/full.sh", + "archive:create:delta": "dotenv -- ./scripts/archive/create/delta.sh", + "archive:upload:full": "dotenv -- ./scripts/archive/upload/full.sh", + "archive:upload:delta": "dotenv -- ./scripts/archive/upload/delta.sh", + "archive:upload:test": "dotenv -- ./scripts/archive/upload/test.sh" }, "browserslist": { "production": [ diff --git a/scripts/archive/.gitignore b/scripts/archive/.gitignore new file mode 100644 index 0000000..2d98271 --- /dev/null +++ b/scripts/archive/.gitignore @@ -0,0 +1,5 @@ +/manifest-full.txt +/manifest-full.sorted.txt +/manifest-remote.txt +/manifest-remote.sorted.txt +/manifest-delta.txt \ No newline at end of file diff --git a/scripts/archive/create/.gitignore b/scripts/archive/create/.gitignore deleted file mode 100644 index 8b794ed..0000000 --- a/scripts/archive/create/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/urls-cache.txt -/urls-cache.sorted.txt -/urls-cache-backup.txt -/urls-cache-backup.sorted.txt -/urls-cache-delta.txt \ No newline at end of file diff --git a/scripts/archive/create/compute-backup-delta.sh b/scripts/archive/create/compute-backup-delta.sh deleted file mode 100755 index c30d244..0000000 --- a/scripts/archive/create/compute-backup-delta.sh +++ /dev/null @@ -1,24 +0,0 @@ -# Sort urls-cache-backup.txt (what we already have backed up). -cat $(dirname $0)/urls-cache-backup.txt \ - | \ - sort \ - | \ - uniq - $(dirname $0)/urls-cache-backup.sorted.txt \ - && \ - # Sort urls-cache.txt (what's available on images.neopets.com). - cat $(dirname $0)/urls-cache.txt \ - | \ - sort \ - | \ - uniq - $(dirname $0)/urls-cache.sorted.txt \ - && \ - # Compute the diff between these two files, filtering to lines that start - # with "> ", meaning it's in urls-cache.txt but not in urls-cache-backup.txt. - diff $(dirname $0)/urls-cache-backup.sorted.txt $(dirname $0)/urls-cache.sorted.txt \ - | \ - grep '^>' \ - | \ - sed 's/^>\s*//' \ - | \ - # Output to urls-cache-delta.txt, and to the screen. - tee $(dirname $0)/urls-cache-delta.txt \ No newline at end of file diff --git a/scripts/archive/create/delta.sh b/scripts/archive/create/delta.sh new file mode 100755 index 0000000..b0ab090 --- /dev/null +++ b/scripts/archive/create/delta.sh @@ -0,0 +1 @@ +MANIFEST=$(dirname $0)/../manifest-delta.txt ./download-urls.sh \ No newline at end of file diff --git a/scripts/archive/create/download-urls-delta.sh b/scripts/archive/create/download-urls-delta.sh deleted file mode 100755 index 063ece6..0000000 --- a/scripts/archive/create/download-urls-delta.sh +++ /dev/null @@ -1,3 +0,0 @@ -# Run archive:create:download-urls, but using our delta URLs file specifically. -URLS_CACHE=$(dirname $0)/urls-cache-delta.txt \ - yarn archive:create:download-urls \ No newline at end of file diff --git a/scripts/archive/create/download-urls.sh b/scripts/archive/create/download-urls.sh index aca861e..a336ece 100755 --- a/scripts/archive/create/download-urls.sh +++ b/scripts/archive/create/download-urls.sh @@ -1,5 +1,5 @@ echo 'Starting! (Note: If many of the URLs are already downloaded, it will take some time for wget to quietly check them all and find the new ones.)' -xargs --arg-file=${URLS_CACHE=$(dirname $0)/urls-cache.txt} -P 8 wget --directory-prefix=${ARCHIVE_DIR=$(dirname $0)} --force-directories --no-clobber --timeout=10 --retry-connrefused --retry-on-host-error --no-cookies --compression=auto --https-only --no-verbose +xargs --arg-file=$MANIFEST -P 8 wget --directory-prefix=${ARCHIVE_DIR=$(dirname $0)} --force-directories --no-clobber --timeout=10 --retry-connrefused --retry-on-host-error --no-cookies --compression=auto --https-only --no-verbose # It's expected that xargs will exit with code 123 if wget failed to load some # of the URLs. So, if it exited with 123, exit this script with 0 (success). diff --git a/scripts/archive/create/full.sh b/scripts/archive/create/full.sh new file mode 100755 index 0000000..21ad2fa --- /dev/null +++ b/scripts/archive/create/full.sh @@ -0,0 +1 @@ +MANIFEST=$(dirname $0)/../manifest-full.txt ./download-urls.sh \ No newline at end of file diff --git a/scripts/archive/prepare/delta.sh b/scripts/archive/prepare/delta.sh new file mode 100755 index 0000000..10bb59b --- /dev/null +++ b/scripts/archive/prepare/delta.sh @@ -0,0 +1,30 @@ +# Prepare the full manifest of URLs. +yarn archive:prepare:full \ + && \ + # Prepare the manifest of URLs already present on the remote archive. + yarn archive:prepare:remote \ + && \ + # Sort manifest-remote.txt (what we already have backed up). + cat $(dirname $0)/../manifest-remote.txt \ + | \ + sort \ + | \ + uniq - $(dirname $0)/../manifest-remote.sorted.txt \ + && \ + # Sort manifest-full.txt (what's available on images.neopets.com). + cat $(dirname $0)/../manifest-full.txt \ + | \ + sort \ + | \ + uniq - $(dirname $0)/../manifest-full.sorted.txt \ + && \ + # Compute the diff between these two files, filtering to lines that start + # with "> ", meaning it's in manifest-full.txt but not in manifest-remote.txt. + diff $(dirname $0)/../manifest-remote.sorted.txt $(dirname $0)/../manifest-full.sorted.txt \ + | \ + grep '^>' \ + | \ + sed 's/^>\s*//' \ + | \ + # Output to manifest-delta.txt, and to the screen. + tee $(dirname $0)/../manifest-delta.txt \ No newline at end of file diff --git a/scripts/archive/create/list-urls.js b/scripts/archive/prepare/full.js similarity index 98% rename from scripts/archive/create/list-urls.js rename to scripts/archive/prepare/full.js index e55f467..6d9a21e 100644 --- a/scripts/archive/create/list-urls.js +++ b/scripts/archive/prepare/full.js @@ -1,5 +1,5 @@ /** - * archive:create:list-urls generates a urls-cache.txt file, containing all of + * archive:prepare:full generates a manifest-full.txt file, containing all of * the images.neopets.com URLs for customization that Dress to Impress is aware * of. This will enable us to back them all up in an archive! * @@ -75,7 +75,7 @@ async function main() { } async function createUrlsCacheFile() { - const urlsCacheFilePath = path.join(__dirname, "urls-cache.txt"); + const urlsCacheFilePath = path.join(__dirname, "../manifest-full.txt"); return await fs.open(urlsCacheFilePath, "w"); } diff --git a/scripts/archive/create/read-backup-state.sh b/scripts/archive/prepare/remote.sh similarity index 84% rename from scripts/archive/create/read-backup-state.sh rename to scripts/archive/prepare/remote.sh index 7b88bb0..f267811 100755 --- a/scripts/archive/create/read-backup-state.sh +++ b/scripts/archive/prepare/remote.sh @@ -10,5 +10,5 @@ yarn aws s3 ls --recursive s3://dti-archive/ \ # Hacky urlencode; the only % value in URLs list today is %20, so... sed -E 's/ /%20/' \ | \ - # Output to urls-cache-backup.txt, and print to the screen. - tee $(dirname $0)/urls-cache-backup.txt \ No newline at end of file + # Output to manifest-remote.txt, and print to the screen. + tee $(dirname $0)/../manifest-remote.txt \ No newline at end of file diff --git a/scripts/archive/upload/delta.sh b/scripts/archive/upload/delta.sh new file mode 100755 index 0000000..2f08c1b --- /dev/null +++ b/scripts/archive/upload/delta.sh @@ -0,0 +1 @@ +echo 'archive:upload:delta -- TODO!' \ No newline at end of file diff --git a/scripts/archive/upload/full.sh b/scripts/archive/upload/full.sh new file mode 100755 index 0000000..d17d2d7 --- /dev/null +++ b/scripts/archive/upload/full.sh @@ -0,0 +1 @@ +yarn aws s3 sync $ARCHIVE_DIR s3://$ARCHIVE_STORAGE_BUCKET \ No newline at end of file diff --git a/scripts/archive/upload/test.sh b/scripts/archive/upload/test.sh new file mode 100755 index 0000000..32941ab --- /dev/null +++ b/scripts/archive/upload/test.sh @@ -0,0 +1 @@ +yarn aws s3 sync $ARCHIVE_DIR/images.neopets.com/cp/items/data/000/000/000/1_8422bedbf9/ s3://$ARCHIVE_STORAGE_BUCKET/images.neopets.com/cp/items/data/000/000/000/1_8422bedbf9/ \ No newline at end of file