diff --git a/Documentation/docs/contributing/content-link-upload.png b/Documentation/docs/contributing/content-link-upload.png deleted file mode 100644 index 32df15aa486..00000000000 Binary files a/Documentation/docs/contributing/content-link-upload.png and /dev/null differ diff --git a/Documentation/docs/contributing/data.md b/Documentation/docs/contributing/data.md index 81eb8dc4eda..adab0e5e701 100644 --- a/Documentation/docs/contributing/data.md +++ b/Documentation/docs/contributing/data.md @@ -11,7 +11,8 @@ generation) also applies to any other data contained in a text file that a test may require, if any. If you just want to browse and download the ITK testing images, browse the -[ITKData Datalad repository]. +[ITKTestingData repository]. Historical snapshots are also archived in +the [ITKData DataLad repository]. Setup ----- @@ -43,7 +44,33 @@ associated with these files. Generate the *.cid* content link from your test data file, *MyTest.png* in this example, with the [content-link-upload] web app. This app will upload the data to IPFS and provide a *.cid* CMake ExternalData content link file -to download. +to download. This is the easiest and recommended way to upload new test data. + +For command-line uploads, run the Python helper at +`Utilities/Maintenance/ExternalDataUpload/upload.py` from the +`external-data-upload` pixi environment: + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py \ + Modules/.../test/Baseline/MyTest.png +``` + +The helper packs the file into a CARv1 with `npx ipfs-car` (defaults +match the unixfs-v1-2025 / IPIP-0499 profile so CIDs are reproducible), +uploads the CAR to your [Filebase] IPFS bucket via Filebase's S3-compatible +REST API, verifies the CID Filebase reports back matches what was computed +locally, and replaces the original file with `MyTest.png.cid` containing +that CID. The CID and source-tree path are also recorded in +`Testing/Data/content-links.manifest`. A local IPFS daemon is **not** +required. + +First-time CLI users must complete the one-time pixi + Filebase setup +documented in +[`Utilities/Maintenance/ExternalDataUpload/README.md`] before the helper +will succeed. Contributors who prefer not to run any local tooling can +instead use the [content-link-upload] web app, which returns a `.cid` +file directly — manifest and mirror updates must then be added by hand. For more details, see the description and procedures in [Upload Binary Data]. @@ -142,5 +169,8 @@ the [InterPlanetary File System (IPFS)]. [CMake ExternalData: Using Large Files with Distributed Version Control]: https://blog.kitware.com/cmake-externaldata-using-large-files-with-distributed-version-control/ [content-link-upload]: https://content-link-upload.itk.org [InterPlanetary File System (IPFS)]: https://ipfs.tech/ -[ITKData Datalad repository]: https://gin.g-node.org/InsightSoftwareConsortium/ITKData/src/main +[ITKData DataLad repository]: https://gin.g-node.org/InsightSoftwareConsortium/ITKData/src/main +[ITKTestingData repository]: https://github.com/InsightSoftwareConsortium/ITKTestingData [Upload Binary Data]: upload_binary_data.md +[`Utilities/Maintenance/ExternalDataUpload/README.md`]: https://github.com/InsightSoftwareConsortium/ITK/blob/main/Utilities/Maintenance/ExternalDataUpload/README.md +[Filebase]: https://filebase.com/ diff --git a/Documentation/docs/contributing/upload_binary_data.md b/Documentation/docs/contributing/upload_binary_data.md index 905bac11932..fa08fe7978b 100644 --- a/Documentation/docs/contributing/upload_binary_data.md +++ b/Documentation/docs/contributing/upload_binary_data.md @@ -33,35 +33,47 @@ adopting Web3, we gain: - **Scalability** - **Sustainability** -Contributors to the ITK upload their data through a simple web app -that utilizes an easy-to-use, permissionless, free service, [web3.storage]. - -Data used in the ITK Git repository is periodically tracked in a -dedicated DataLad repository, the [ITKData DataLad repository]. -and stored across redundant locations so it can be retrieved from any of -the following: - -- Local [IPFS](https://ipfs.io/) nodes -- Peer [IPFS](https://ipfs.io/) nodes -- [web3.storage](https://web3.storage/) -- [pinata.cloud](https://pinata.cloud) -- Kitware's IPFS Server -- [ITKTestingData](https://github.com/InsightSoftwareConsortium/ITKTestingData) GitHub Pages CDN +Contributors upload their data by running a small Python helper that packs +the file into a [CARv1] using `npx ipfs-car`, uploads the CAR to a [Filebase] +IPFS bucket through Filebase's S3-compatible REST API, records the resulting +CID in a manifest, and (optionally) mirrors the bytes into the [ITKTestingData] +GitHub Pages repository. A local [Kubo] daemon, IPFS Desktop, or any +`ipfs pin remote` PSA service is **not** required. See +[`Utilities/Maintenance/ExternalDataUpload/README.md`] for the one-time +developer setup and full workflow. + +[CARv1]: https://ipld.io/specs/transport/car/carv1/ + +Data referenced from the ITK Git repository is stored across redundant +locations so it can be retrieved from any of the following at build time: + +- [Filebase] IPFS gateway (where uploads land) +- [ITKTestingData] GitHub Pages mirror +- Public IPFS HTTP gateways (`ipfs.io`, `dweb.link`, `cloudflare-ipfs.com`, + `gateway.pinata.cloud`) +- Local [Kubo] gateway (typically `127.0.0.1:8080`) when present - Kitware's Apache HTTP Server -- Local testing data cache +- Local `ExternalData_OBJECT_STORES` cache - Archive tarballs from GitHub Releases +- Historical [ITKData DataLad repository] snapshots (older content links) ![ITK testing data figure](./itk-testing-data.png) -*Testing data workflow. Testing or example data is uploaded to IPFS via the content-link-upload.itk.org web app. -This pins the data on multiple servers across the globe. -At release time, the data is also pinned on multiple servers in the USA and France and community pinners. -At release time, the data is also stored in the DataLad Git repository, served on an Apache HTTP server, and the GitHub Pages CDN. -At test time an ITK build can pull the data from a local cache, archive tarball, the Apache HTTP server, GitHub Pages CDN, or multiple IPFS HTTP gateways.* - -See also our [Data](data.md) guide for more information. If you just -want to browse and download the ITK testing images, see the -[ITKData DataLad repository]. +*Testing data workflow. New content is added with the +`Utilities/Maintenance/ExternalDataUpload/upload.py` helper, which packs +the file into a CAR with `npx ipfs-car` (defaults match the +unixfs-v1-2025 / IPIP-0499 profile so CIDs are reproducible) and uploads +the CAR to a [Filebase] IPFS bucket via boto3 against Filebase's +S3-compatible API. The CID Filebase reports back from `head_object` is +verified against the locally computed CID, written as a `.cid` content +link in the ITK source tree, and recorded in +`Testing/Data/content-links.manifest`. Files ≤ 50 MB can additionally be +mirrored into [ITKTestingData] for GitHub Pages CDN delivery. At test +time an ITK build can fetch the data from a local cache, archive tarball, +the Apache HTTP server, the GitHub Pages mirror, or any of several public +IPFS HTTP gateways.* + +See also our [Data](data.md) guide for more information. Adding images as input to ITK sources ------------------------------------- @@ -89,88 +101,119 @@ need to be followed: Upload new testing data ----------------------- -### Prerequisites +### One-time setup -[web3.storage] is a decentralized IPFS storage -provider where any ITK community member can upload binary data files. -There are two primary methods available to upload data files: +The upload workflow needs: -A. The CMake ExternalData Web3 upload browser interface. -B. The w3 command line executable that - comes with the [@web3-storage/w3cli] Node.js NPM package. +- The `external-data-upload` pixi environment installed + (`pixi install -e external-data-upload`). It provides Python 3, [boto3], + and Node.js (which makes `npx ipfs-car` available without a separate + global install). +- A [Filebase] IPFS bucket and an S3 access key for that bucket. Filebase's + free tier is sufficient — the upload uses the S3 import-as-CAR path, + not the legacy IPFS Pinning Service API. +- The credentials exported as environment variables before running the + helper: -Once files have been uploaded, they will be publicly -available and accessible since data is content addressed on the IPFS -peer-to-peer network. +```bash +export FILEBASE_ACCESS_KEY=... +export FILEBASE_SECRET_KEY=... +export FILEBASE_BUCKET=itk-data +``` -In addition to these two methods, documented in detail below, another -possibility includes pinning the data on IPFS with [other pinning services] -and creating the content link file manually. The content link file is simply a -plan text file with a `.cid` extension whose contents are the CID file. -However, the documented two methods are recommended due to their simplicity -and in order to keep CID values consistent. +The full step-by-step setup is documented in +[`Utilities/Maintenance/ExternalDataUpload/README.md`]. Complete that +one-time setup before proceeding. -At release time, the release manager uploads and archives repository data -references in other storage locations for additional redundancy. +[boto3]: https://boto3.amazonaws.com/ -### Option A) Upload Via the Web Interface +### Upload a file -Use the [Content Link Upload] -tool ([Alt Link]) to -upload your data to the [IPFS] and download the -corresponding CMake content link file. +From the ITK source tree, run the upload helper with the path to the file +you want to upload: -![[CMake ExternalData Web3 -Content Link Upload](https://content-link-upload.itk.org/)](./content-link-upload.png) +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py \ + Modules/.../test/Baseline/MyTest.png +``` -### Option B) Upload Via CMake and Node.js CLI +The helper will: -Install the w3 CLI with the -[@web3-storage/w3cli] [Node.js] package: +1. Pack the file into a CARv1 with `npx ipfs-car pack --no-wrap` — + ipfs-car v1+ defaults to 1 MiB chunks, 1024 children per node, raw + leaves, CIDv1, which is the unixfs-v1-2025 profile, so the CID is + reproducible across implementations. +2. PUT the CAR to your Filebase IPFS bucket with + `x-amz-meta-import: car` so Filebase imports it server-side, then + read the imported CID back via `head_object` and verify it matches + the locally computed CID. +3. Replace `MyTest.png` in the source tree with `MyTest.png.cid` — a + one-line text file containing the CID. +4. Append the CID and source-tree path to + `Testing/Data/content-links.manifest`. +5. Print the `git rm` / `git add` commands needed to stage the change. -```bash -npm install -g @web3-storage/w3cli -``` +### Mirror to ITKTestingData (optional but recommended) -Login in with your credentials. +Pass `--testing-data-repo ` to additionally copy the file into a +local clone of [ITKTestingData] at `CID/`: ```bash -w3 login +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py \ + --testing-data-repo ~/src/ITKTestingData \ + Modules/.../test/Baseline/MyTest.png ``` -Create an w3externaldata bash/zsh -function: +This populates the GitHub Pages mirror gateway +(`https://insightsoftwareconsortium.github.io/ITKTestingData/CID/`) +already listed in [`CMake/ITKExternalData.cmake`]. Commit and push in +the `ITKTestingData` repo to publish. Files larger than **50 MB** are +skipped for the mirror step only (GitHub rejects pushes containing +files over 50 MB per file) — the Filebase upload still proceeds for +those files. -```bash -function w3externaldata() { w3 put $1 --no-wrap | tail -n 1 | awk -F "/ipfs/" '{print $2}' | tee $1.cid } -``` +### Alternative: upload via the web app + +Contributors who prefer not to run any local tooling can upload a file +through the [Content Link Upload] web app ([Alt Link]). The app pins the +file and returns the corresponding `.cid` content link to download. The +resulting CID is usable anywhere the helper-produced CID would be — but +the manifest entry and the optional [ITKTestingData] mirror must then be +added by hand. The helper above is preferred when available because it +also updates `Testing/Data/content-links.manifest` in one step. -Call the function with the file to be uploaded. This command will -generate the \.cid content -link: +### Normalize existing content links + +Older `.md5` / `.sha256` / `.sha512` content links can be converted to +`.cid`, and existing `.cid` links can be regenerated under the +unixfs-v1-2025 profile, with: ```bash -w3externaldata - 1 file (0.3MB) -⁂ Stored 1 file -bafkreifpfhcc3gc7zo2ds3ktyyl5qrycwisyaolegp47cl27i4swxpa2ey +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/normalize.py ``` -### Add the content link to the source tree +See [`Utilities/Maintenance/ExternalDataUpload/README.md`] for the full +set of options (`--dry-run`, `--hash-only`, `--cid-only`, +`--testing-data-repo`, `--bucket`). -Add the file to the repository in the directory referenced by the -*CMakeLists.txt* script. Move the content link file to the **source tree** at -the location where the actual file is desired in the build tree. +### Add the content link to the source tree -Stage the new file to your commit: +The upload helper prints the exact commands to stage: ```bash -git add -- path/to/file.cid +git rm path/to/MyTest.png +git add path/to/MyTest.png.cid +git add Testing/Data/content-links.manifest +git commit ``` -Next time CMake configuration runs, it will find the new content link. During -the next project build, the data file corresponding to the content link will -be downloaded into the build tree. +Next time CMake configuration runs, it will find the new content link. +During the next project build, the data file corresponding to the +content link will be downloaded into the build tree from the first +reachable gateway in [`CMake/ITKExternalData.cmake`]. [Alt Link]: https://content-link-upload.itk.eth.limo [Analyze format]: http://www.grahamwideman.com/gw/brain/analyze/formatdoc.htm @@ -178,18 +221,16 @@ be downloaded into the build tree. [Content Link Upload]: https://content-link-upload.itk.org [CONTRIBUTING.md]: ../CONTRIBUTING.md [CMake]: https://cmake.org/ +[`CMake/ITKExternalData.cmake`]: https://github.com/InsightSoftwareConsortium/ITK/blob/main/CMake/ITKExternalData.cmake +[Filebase]: https://filebase.com/ [Git]: https://git-scm.com/ [IPFS]: https://ipfs.io/ -[ITKData Datalad repository]: https://gin.g-node.org/InsightSoftwareConsortium/ITKData/src/main [ITK community]: https://discourse.itk.org/ [ITK Sphinx Examples]: https://itk.org/ITKExamples/index.html [ITK Software Guide]: https://itk.org/ItkSoftwareGuide.pdf +[ITKData DataLad repository]: https://gin.g-node.org/InsightSoftwareConsortium/ITKData/src/main [ITKTestingData]: https://github.com/InsightSoftwareConsortium/ITKTestingData -[MD5 hash]: https://en.wikipedia.org/wiki/MD5 +[Kubo]: https://github.com/ipfs/kubo [multiformats]: https://multiformats.io/ -[Node.js]: https://nodejs.org/ -[other pinning services]: https://docs.ipfs.tech/how-to/work-with-pinning-services/ -[SHA512 hash]: https://en.wikipedia.org/wiki/SHA-2 [solution to this problem]: https://blog.kitware.com/cmake-externaldata-using-large-files-with-distributed-version-control/ -[web3.storage]: https://web3.storage/ -[@web3-storage/w3cli]: https://www.npmjs.com/package/@web3-storage/w3cli +[`Utilities/Maintenance/ExternalDataUpload/README.md`]: https://github.com/InsightSoftwareConsortium/ITK/blob/main/Utilities/Maintenance/ExternalDataUpload/README.md diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk.cid new file mode 100644 index 00000000000..e493e31b03b --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk.cid @@ -0,0 +1 @@ +bafkreifmtmpjuppizngftzcnt3ilufa66dajy3i6xogn3jfirveqw63cwu diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk.md5 deleted file mode 100644 index 3df854dded2..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk.md5 +++ /dev/null @@ -1 +0,0 @@ -df95fdb0657f7f8472bdc16c73c5bed0 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png.cid new file mode 100644 index 00000000000..45edb5677d4 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png.cid @@ -0,0 +1 @@ +bafkreif4yuyueovggfvnjj3qrnct54nrm52pfconktlvavzq7kt64jo3ji diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png.md5 deleted file mode 100644 index 3b5dbe8354c..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png.md5 +++ /dev/null @@ -1 +0,0 @@ -e4e5e233b434ea4c85059d7c62f15554 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png.cid new file mode 100644 index 00000000000..e3ec6f7a548 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png.cid @@ -0,0 +1 @@ +bafkreicjpqxemmg3lgvigudscqqaiy3mjrgm3tm3ep7ngskw3is3fix2qi diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png.md5 deleted file mode 100644 index 750c29db86b..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png.md5 +++ /dev/null @@ -1 +0,0 @@ -07435f1d44aeb66fd98e642945437662 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png.cid new file mode 100644 index 00000000000..fae7f8f2427 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png.cid @@ -0,0 +1 @@ +bafkreigxse36jyrc4cs6w77vi6qfd223dhnhrf6v2yuipxcl2wq2h2xjky diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png.md5 deleted file mode 100644 index 76c2e516e80..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png.md5 +++ /dev/null @@ -1 +0,0 @@ -5e752e3fa0e46bb530fedc94c7794c73 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png.cid new file mode 100644 index 00000000000..a82e5548b05 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png.cid @@ -0,0 +1 @@ +bafkreigh7wfs6kgfhqwxcnbt22c4panshoarkda5tdmykstolnwzvkw26m diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png.md5 deleted file mode 100644 index b80ed7eb732..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png.md5 +++ /dev/null @@ -1 +0,0 @@ -ff36663855e6794712b081689aac70e5 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png.cid new file mode 100644 index 00000000000..9a9b0f36397 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png.cid @@ -0,0 +1 @@ +bafkreicbrlzmtr2t3a22hgudfnbicgmvikg2hioxzpzxvw5ncwzmdlwpeq diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png.md5 deleted file mode 100644 index f6a2c0d79ae..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png.md5 +++ /dev/null @@ -1 +0,0 @@ -b741b80ce65e20c59f286244f621344a \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png.cid new file mode 100644 index 00000000000..7b2b845692d --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png.cid @@ -0,0 +1 @@ +bafkreigcrzclz5tri2yhsu63lax4kxabjna652kayppzqp7u3hilwgadte diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png.md5 deleted file mode 100644 index d529c7f1aa4..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png.md5 +++ /dev/null @@ -1 +0,0 @@ -c606fea9c82019d1b4e80d351b803d92 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png.cid new file mode 100644 index 00000000000..bdd66c9283b --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png.cid @@ -0,0 +1 @@ +bafkreibpqafrxnmo2m2gixrkml6g54hvzgiyiyaafxug7zyameeulrpiua diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png.md5 deleted file mode 100644 index b5cb7d21602..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png.md5 +++ /dev/null @@ -1 +0,0 @@ -50da37ff706c93536c0f33390da4287c \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png.cid new file mode 100644 index 00000000000..4404e61996e --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png.cid @@ -0,0 +1 @@ +bafkreihnab2o426g2ffmwj42a4wlrusk5fzev3jwxxjmgw3z4hgq3rosmu diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png.md5 deleted file mode 100644 index 35dfbe815ba..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png.md5 +++ /dev/null @@ -1 +0,0 @@ -21e83dc09f4c58a44eeb676e49ec3d99 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png.cid new file mode 100644 index 00000000000..54757b866fd --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png.cid @@ -0,0 +1 @@ +bafkreiafl54ccpviaq4nm7vufz3wfceoxl5y2bosbvtkyb45btihrgt6ae diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png.md5 deleted file mode 100644 index 6a1cf5d0156..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png.md5 +++ /dev/null @@ -1 +0,0 @@ -c5b358267defea8babcfebbc66c9fa8b \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png.cid new file mode 100644 index 00000000000..75fd56c7753 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png.cid @@ -0,0 +1 @@ +bafkreib2z57ja7aqbz4ddzpprpgxwj3vojaeaijbmeddnbdon65y52536u diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png.md5 deleted file mode 100644 index 223ef256a52..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png.md5 +++ /dev/null @@ -1 +0,0 @@ -fd8d652016508d93ee861c1db83f3ed4 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png.cid new file mode 100644 index 00000000000..4589307d9e9 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png.cid @@ -0,0 +1 @@ +bafkreihod4nsri7yzrd7h354snfk24xenhwaqy4rpt356vqcuqupwrcu3a diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png.md5 deleted file mode 100644 index a08c69480fd..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png.md5 +++ /dev/null @@ -1 +0,0 @@ -edf293e2cce2eae1df4f8598e8179641 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png.cid new file mode 100644 index 00000000000..224854e8b33 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png.cid @@ -0,0 +1 @@ +bafkreiaslkugrrcu3wvgfvhyqyt4p4voolthjbrjlwwyhvppsmdb4sb2au diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png.md5 deleted file mode 100644 index 09f2a9ab526..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png.md5 +++ /dev/null @@ -1 +0,0 @@ -f823b62e9135a37c7438fa07a9e54096 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png.cid new file mode 100644 index 00000000000..5098a3e9e4a --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png.cid @@ -0,0 +1 @@ +bafkreicwjq4f2xajppwq3kye2cc2mnkvn5dfqlc7uoexm7qbftya6sxbcm diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png.md5 deleted file mode 100644 index ff3bfe4ac73..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png.md5 +++ /dev/null @@ -1 +0,0 @@ -12473a0cb8d3afa0f8d7eb4f61e6216b \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk.cid new file mode 100644 index 00000000000..24ac0cfdf01 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk.cid @@ -0,0 +1 @@ +bafkreib2k7buke3tmpvugklmy56q2a466thyegmzn5jdhz4y4jm5bhwuiy diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk.md5 deleted file mode 100644 index 77458bed992..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk.md5 +++ /dev/null @@ -1 +0,0 @@ -23c4495de1a746648418abc144972e92 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk.cid new file mode 100644 index 00000000000..a741f6bcf1a --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk.cid @@ -0,0 +1 @@ +bafybeiffbnw2lggwcdgjuanqhhmur7ntoc7f5wgypbm5cjab2bkjtkjnvy diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk.md5 deleted file mode 100644 index 9e91116b83f..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk.md5 +++ /dev/null @@ -1 +0,0 @@ -db41c262fba84a75eb399e1e154a5974 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk.cid new file mode 100644 index 00000000000..5030d3f4b37 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk.cid @@ -0,0 +1 @@ +bafkreiew5kj3pus2c3cis7t57cfiiekkzdhfgt44ygfoafsaebk2kvxike diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk.md5 deleted file mode 100644 index c48093eec6b..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk.md5 +++ /dev/null @@ -1 +0,0 @@ -3a7d9131a732794fcb4100909cd3fd1c \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png.cid new file mode 100644 index 00000000000..5cbf09ebca5 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png.cid @@ -0,0 +1 @@ +bafkreig2pwvdimswvimhz43bmhrrcozyf233wn7txir5hx6jtykws5qzvm diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png.md5 deleted file mode 100644 index 2992b7a556c..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png.md5 +++ /dev/null @@ -1 +0,0 @@ -ed7342b4598d44574b2714834b705cad \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png.cid new file mode 100644 index 00000000000..0cd60fda78f --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png.cid @@ -0,0 +1 @@ +bafkreic5pnb5dbbpbo6fgjk3atkjzdafuqbtzpfxeq7dfc724zsjnggvcu diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png.md5 deleted file mode 100644 index 2d8222d7362..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png.md5 +++ /dev/null @@ -1 +0,0 @@ -45a1845c6fa452c7465bebda5bbe9b0f \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png.cid new file mode 100644 index 00000000000..80613d5c325 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png.cid @@ -0,0 +1 @@ +bafkreiae437zhanhbgmn2oxy2xyzee3ux5s2eyek6hsgk5bzd7lhwz6yzm diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png.md5 deleted file mode 100644 index 4797a352a96..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png.md5 +++ /dev/null @@ -1 +0,0 @@ -75818e9d765fb6838a8cf5845ac19b9a \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png.cid new file mode 100644 index 00000000000..8569b10850a --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png.cid @@ -0,0 +1 @@ +bafkreibhkebc4kkb5ysuelc2kfllixc2xq3looqfkvrq6f3qx5a42cvm5q diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png.md5 deleted file mode 100644 index 0abec1e6758..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png.md5 +++ /dev/null @@ -1 +0,0 @@ -d7955368c6f49cbb451d8901aa40add6 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png.cid new file mode 100644 index 00000000000..5881f9b832c --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png.cid @@ -0,0 +1 @@ +bafkreicey264ntq4ew4wnlyoy23cu5k3cmxwvlm7fg4r5skcbtwhsh5jfe diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png.md5 deleted file mode 100644 index c5594b80c04..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png.md5 +++ /dev/null @@ -1 +0,0 @@ -bce40d3af4f491d728aaba8bb8c9ede9 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk.cid new file mode 100644 index 00000000000..7ff05bad9e0 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk.cid @@ -0,0 +1 @@ +bafkreics5ulkrwki4epnu6l4umam3on7ovr5ao4yfva7f2odkkp25j26gm diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk.md5 deleted file mode 100644 index e664c2ad407..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk.md5 +++ /dev/null @@ -1 +0,0 @@ -0a9e85b2b8dfadb4ab25b828a0f23852 \ No newline at end of file diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk.cid b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk.cid new file mode 100644 index 00000000000..ebd35daafb0 --- /dev/null +++ b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk.cid @@ -0,0 +1 @@ +bafybeid6ongwkdpv3manmr4qpu22zybq4frslcqj3ysmkxc5tmps2aqnfu diff --git a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk.md5 b/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk.md5 deleted file mode 100644 index 2e105fba437..00000000000 --- a/Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk.md5 +++ /dev/null @@ -1 +0,0 @@ -ff88f04e75dc478b283ecdf39d8d7687 \ No newline at end of file diff --git a/Testing/Data/README.md b/Testing/Data/README.md index 41c19ec53ec..275e0dff962 100644 --- a/Testing/Data/README.md +++ b/Testing/Data/README.md @@ -7,3 +7,24 @@ tests and hence ensure the health of the toolkit: * The `Baseline` directory contains valid images created by tests. Generated images are compared with these baseline images during regression testing. * The `Input` directory contains data files that are used by the tests. + +Adding test data +---------------- + +Test data is fetched at build time from content-addressed storage by +`CMake/ITKExternalData.cmake`. Large files are *not* committed to the ITK git +repository; instead, a small `.cid` (or `.md5` / `.sha256`) content-link file +is committed next to where the data is referenced. + +To add new test data, use the upload skill at +`Utilities/Maintenance/ExternalDataUpload/`: + +```bash +Utilities/Maintenance/ExternalDataUpload/ipfs-upload.sh +``` + +The script uploads the file to IPFS, pins it on the redundant pinning +services, replaces the original with a `.cid` content-link, and records the +CID in `Testing/Data/content-links.manifest`. See the skill's `README.md` +for one-time setup and the full workflow, including the optional +`ITKTestingData` GitHub Pages mirror step. diff --git a/Testing/Data/content-links.manifest b/Testing/Data/content-links.manifest new file mode 100644 index 00000000000..62dd0827cb9 --- /dev/null +++ b/Testing/Data/content-links.manifest @@ -0,0 +1,38 @@ +# ITK content-link manifest +# +# One entry per line, format: +# +# Maintained automatically by +# Utilities/Maintenance/ExternalDataUpload/upload.py +# which packs each file into a CARv1 (unixfs-v1-2025 profile) and uploads +# the CAR to a Filebase IPFS bucket via boto3. +# +# Paths must not contain whitespace (the manifest uses a single space as +# the field delimiter). Data lines are kept sorted by path; comment lines +# above the first data line are preserved on re-write. +bafkreifmtmpjuppizngftzcnt3ilufa66dajy3i6xogn3jfirveqw63cwu Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Cos3D_cCED.vtk +bafkreif4yuyueovggfvnjj3qrnct54nrm52pfconktlvavzq7kt64jo3ji Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_I_20.png +bafkreicjpqxemmg3lgvigudscqqaiy3mjrgm3tm3ep7ngskw3is3fix2qi Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cCED_20.png +bafkreigxse36jyrc4cs6w77vi6qfd223dhnhrf6v2yuipxcl2wq2h2xjky Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/FingerPrint_cEED_20.png +bafkreigh7wfs6kgfhqwxcnbt22c4panshoarkda5tdmykstolnwzvkw26m Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_I_2.png +bafkreicbrlzmtr2t3a22hgudfnbicgmvikg2hioxzpzxvw5ncwzmdlwpeq Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cCED_2.png +bafkreigcrzclz5tri2yhsu63lax4kxabjna652kayppzqp7u3hilwgadte Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Lena_Detail_cEED_2.png +bafkreibpqafrxnmo2m2gixrkml6g54hvzgiyiyaafxug7zyameeulrpiua Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_CED.png +bafkreihnab2o426g2ffmwj42a4wlrusk5fzev3jwxxjmgw3z4hgq3rosmu Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Oscillations1_cCED.png +bafkreiafl54ccpviaq4nm7vufz3wfceoxl5y2bosbvtkyb45btihrgt6ae Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_I.png +bafkreib2z57ja7aqbz4ddzpprpgxwj3vojaeaijbmeddnbdon65y52536u Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cCED.png +bafkreihod4nsri7yzrd7h354snfk24xenhwaqy4rpt356vqcuqupwrcu3a Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/PacMan_cEED.png +bafkreiaslkugrrcu3wvgfvhyqyt4p4voolthjbrjlwwyhvppsmdb4sb2au Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_EED.png +bafkreicwjq4f2xajppwq3kye2cc2mnkvn5dfqlc7uoexm7qbftya6sxbcm Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/Triangle_cEED.png +bafkreib2k7buke3tmpvugklmy56q2a466thyegmzn5jdhz4y4jm5bhwuiy Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/VectorField_Circle_cEED.vtk +bafybeiffbnw2lggwcdgjuanqhhmur7ntoc7f5wgypbm5cjab2bkjtkjnvy Modules/Filtering/AnisotropicDiffusionLBR/test/Baseline/mrbrain_cEED.vtk +bafkreiew5kj3pus2c3cis7t57cfiiekkzdhfgt44ygfoafsaebk2kvxike Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Cos3D_Noisy.vtk +bafkreig2pwvdimswvimhz43bmhrrcozyf233wn7txir5hx6jtykws5qzvm Modules/Filtering/AnisotropicDiffusionLBR/test/Input/FingerPrint.png +bafkreic5pnb5dbbpbo6fgjk3atkjzdafuqbtzpfxeq7dfc724zsjnggvcu Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Lena_Detail.png +bafkreiae437zhanhbgmn2oxy2xyzee3ux5s2eyek6hsgk5bzd7lhwz6yzm Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Oscillations_Noisy1.png +bafkreibhkebc4kkb5ysuelc2kfllixc2xq3looqfkvrq6f3qx5a42cvm5q Modules/Filtering/AnisotropicDiffusionLBR/test/Input/PacMan.png +bafkreicey264ntq4ew4wnlyoy23cu5k3cmxwvlm7fg4r5skcbtwhsh5jfe Modules/Filtering/AnisotropicDiffusionLBR/test/Input/Triangle.png +bafkreics5ulkrwki4epnu6l4umam3on7ovr5ao4yfva7f2odkkp25j26gm Modules/Filtering/AnisotropicDiffusionLBR/test/Input/VectorField_CircleOpposites.vtk +bafybeid6ongwkdpv3manmr4qpu22zybq4frslcqj3ysmkxc5tmps2aqnfu Modules/Filtering/AnisotropicDiffusionLBR/test/Input/mrbrain_noisy.vtk +bafkreia52ajz3mxwv5rusp33a6mcl7mphp772zkzqthc2xwlb7rmn6fsyy Testing/Data/Baseline/Filtering/CurvatureAnisotropicDiffusionImageFilter.2.png +bafybeidgydpaoeu6qv4jupn3apal7ri47zr2q2qar435d3l4mdri66opby Wrapping/images/warp3D.nii.gz diff --git a/Utilities/Maintenance/ExternalDataUpload/README.md b/Utilities/Maintenance/ExternalDataUpload/README.md new file mode 100644 index 00000000000..7c670b97c60 --- /dev/null +++ b/Utilities/Maintenance/ExternalDataUpload/README.md @@ -0,0 +1,257 @@ +# ITK External Data Upload + +Upload large test images and baselines to [Filebase] IPFS storage, optionally +mirror them into the +[`ITKTestingData`](https://github.com/InsightSoftwareConsortium/ITKTestingData) +repository, and replace the original with a lightweight `.cid` content link +committed to the ITK source tree. + +This complements [`CMake/ITKExternalData.cmake`](../../../CMake/ITKExternalData.cmake), +which fetches content at test configure time from the gateways listed there +(`ITKTestingData` on GitHub Pages, `data.kitware.com`, `itk.org`, local Kubo +gateway, `ipfs.io`, `gateway.pinata.cloud`, `cloudflare-ipfs.com`, +`dweb.link`). + +## How the upload works + +Uploads go directly to a Filebase IPFS bucket over Filebase's +S3-compatible REST API. A local +[Kubo](https://github.com/ipfs/kubo) daemon, IPFS Desktop, or any +configured `ipfs pin remote` PSA service is **not** required. + +For each upload the helper script: + +1. Packs the file into a CARv1 with `npx ipfs-car pack --no-wrap`. ipfs-car + v1+ defaults to **1 MiB chunks, 1024 children per node, raw leaves, + CIDv1**, which is exactly the [unixfs-v1-2025] / IPIP-0499 profile, so + the CID is reproducible across implementations and matches what other + contributors and CI compute for the same content. +2. PUTs the CAR to the configured Filebase bucket with the + `x-amz-meta-import: car` header. Filebase imports the CAR and pins the + resulting CID server-side, exposing it via `head_object` metadata. +3. Reads the CID back from `head_object` and verifies it matches the local + CID. A mismatch aborts the upload. +4. Writes `.cid`, removes the original file, appends/updates an entry + in `Testing/Data/content-links.manifest`, and (with + `--testing-data-repo`) copies the bytes into a local `ITKTestingData` + clone for the GitHub Pages CDN mirror. + +[unixfs-v1-2025]: https://github.com/ipfs/specs/blob/main/IPIP/0499-unixfs-v1-2025-profile.md + +## One-Time Developer Setup + +### 1. Install the pixi environment + +The upload helpers run on top of a small pixi environment that brings in +[boto3] for the Filebase S3 calls, Node.js for `npx ipfs-car`, and +`requests` for the gateway-fetch verification path used by `normalize.py`. +From the ITK source tree: + +```bash +pixi install -e external-data-upload +``` + +[boto3]: https://boto3.amazonaws.com/ + +That installs everything into `.pixi/envs/external-data-upload/`. Verify: + +```bash +pixi run -e external-data-upload python --version +pixi run -e external-data-upload node --version +pixi run -e external-data-upload npx --yes ipfs-car --version +``` + +The first `npx ipfs-car` invocation downloads the package into the npm +cache; subsequent runs are offline. + +### 2. Create a Filebase IPFS bucket and S3 keys + +1. Sign up at (the free tier supports + pin-by-CID via the S3 import path). +2. Create an **IPFS bucket** at . + The bucket name is local to your account — the published CID is the + only thing other contributors need to retrieve the bytes. +3. Create an S3 access key for that bucket at + . Filebase ties keys to a single + bucket, so the access key + secret you receive can only see and + write to that bucket. + +### 3. Export the credentials + +The helper scripts read three environment variables: + +```bash +export FILEBASE_ACCESS_KEY=... # S3 access key +export FILEBASE_SECRET_KEY=... # S3 secret key +export FILEBASE_BUCKET=itk-data # bucket name from step 2 +``` + +Add the exports to your shell profile or a `.env` file you source before +uploads. **Do not** commit credentials to the repository. + +## Usage + +### Upload a single file + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py +``` + +The script will: + +1. Pack the file into a CAR (CIDv1, unixfs-v1-2025 profile) +2. Upload the CAR to your Filebase IPFS bucket and verify the CID +3. Replace the original file with `.cid` containing the CID +4. Append/update an entry in `Testing/Data/content-links.manifest` +5. Print the `git rm` / `git add` commands to stage the change + +### Also mirror the bytes to `ITKTestingData` + +Pass `--testing-data-repo ` to additionally copy the file into a local +clone of +[`ITKTestingData`](https://github.com/InsightSoftwareConsortium/ITKTestingData) +at `CID/` and `git add` it there. This populates the +`https://insightsoftwareconsortium.github.io/ITKTestingData/CID/` mirror +gateway already listed in `CMake/ITKExternalData.cmake`. + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py \ + --testing-data-repo ~/src/ITKTestingData \ + Testing/Data/Input/brain.nii.gz +``` + +**GitHub 50 MB file size limit.** `ITKTestingData` is hosted on GitHub, which +hard-rejects pushes containing files larger than **50 MB** per file. The upload +script checks the file size before mirroring and refuses to copy files over +50 MB into the `ITKTestingData` tree. The Filebase upload still proceeds for +oversized files — the mirror step is the only one that gets skipped, with a +clear warning. + +Commit the staged `CID/` file in `ITKTestingData` and push; the +`gh-pages` workflow on that repo republishes the new file at the GitHub Pages +mirror gateway. + +### Normalize existing content links to CID + +`.md5` / `.sha256` / `.sha512` content links can be converted to `.cid`, and +existing `.cid` links can be regenerated under the unixfs-v1-2025 profile (in +case they were originally produced with older chunker defaults). + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/normalize.py +``` + +The script will, for each content link under the given path: + +1. Fetch the bytes through the gateways in `CMake/ITKExternalData.cmake` + (same order the build uses, so a gateway CI can't reach is a gateway + this script won't accept). +2. Verify the fetched bytes against the declared hash (for `.md5` / `.shaNNN` + links) or the declared CID (for `.cid` links — accepted only when fetched + via an IPFS HTTP gateway, which verifies server-side). +3. Re-materialize the actual file next to the content link, then call the + Filebase uploader so the new CID is produced under the unixfs-v1-2025 + profile and (if `--testing-data-repo` is passed) mirrored into + `ITKTestingData`. The old `.md5` / `.sha256` / `.sha512` link is + removed; a `.cid` link is written in its place. + +Common options: + +```bash +# Dry run — report what would change, modify nothing. +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/normalize.py Modules/Filtering/Foo --dry-run + +# Also mirror bytes into a local ITKTestingData checkout. +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/normalize.py Testing/Data/Input \ + --testing-data-repo ~/src/ITKTestingData + +# Only process files that are currently .md5 / .shaNNN (skip existing .cid). +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/normalize.py Modules --hash-only +``` + +## Content Link Manifest + +`Testing/Data/content-links.manifest` is a plain-text index of every CID the +upload script has produced. One entry per line: + +```text + +``` + +Example: + +```text +bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi Testing/Data/Input/brain.nii.gz +bafkreihvlpx2z3xyhmhegrqo6vn4balcm3gkskdigoyl3i5v7iq5mhtaee Testing/Data/Baseline/Filtering/brain-diff.mha +``` + +Rules: + +- `` is a repo-relative path and **must not contain whitespace** — + the manifest uses a single space as the field delimiter. Rename files with + spaces before uploading. +- `upload.py` maintains this file automatically: entries are added on first + upload and replaced on re-upload. The data lines are sorted by path for a + minimal review diff; comment lines at the top are preserved. +- The manifest should be committed alongside the `.cid` files the upload + produced. + +## How `.cid` Files Work + +A `.cid` file is a single-line plain-text file containing one IPFS CIDv1, +base32-encoded. Example: + +```text +bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi +``` + +ITK's CMake layer recognises the `.cid` extension via +`ExternalData_URL_ALGO_CID_lower` in +[`CMake/ITKExternalData.cmake`](../../../CMake/ITKExternalData.cmake). At +configure time, `ExternalData.cmake` substitutes the CID into each +`ExternalData_URL_TEMPLATES` entry (local Kubo gateway, `ipfs.io`, +`gateway.pinata.cloud`, `cloudflare-ipfs.com`, `dweb.link`, plus the +`ITKTestingData` GitHub Pages mirror) and downloads from the first one that +responds. The downloaded content is cached in +`ExternalData_OBJECT_STORES` under `cid/`. + +Because CIDs are content-addressed, a corrupt download is detected +automatically: a gateway that returns the wrong bytes will produce a different +CID, and the cache lookup misses. + +## Troubleshooting + +### `ERROR: 'npx' not found on PATH` + +The pixi environment is not active. Run the helpers via `pixi run -e +external-data-upload python ...`, or activate the environment first with +`pixi shell -e external-data-upload`. + +### `ERROR: Missing Filebase credentials` + +Export `FILEBASE_ACCESS_KEY`, `FILEBASE_SECRET_KEY`, and `FILEBASE_BUCKET` +(or pass `--bucket`) before running the upload script. See setup step 3. + +### `Filebase did not return a CID for ...` + +The CAR was uploaded but Filebase did not import it. Common causes: + +- The bucket is a regular S3 bucket, not an **IPFS** bucket — recreate at + . +- The S3 access key is read-only or scoped to a different bucket. +- Filebase rate-limited the request — retry after a few seconds. + +### `CID mismatch: local=... filebase=...` + +The CID this client computed (via `npx ipfs-car`) and the CID Filebase +reported after import disagree. This indicates a chunker/profile drift +between the local ipfs-car version and Filebase's importer. Confirm +`pixi run -e external-data-upload npx ipfs-car --version` is v1 or newer, +then retry; if the mismatch persists, file an issue and include both CIDs +in the report. diff --git a/Utilities/Maintenance/ExternalDataUpload/SKILL.md b/Utilities/Maintenance/ExternalDataUpload/SKILL.md new file mode 100644 index 00000000000..f90bb573430 --- /dev/null +++ b/Utilities/Maintenance/ExternalDataUpload/SKILL.md @@ -0,0 +1,133 @@ +--- +name: external-data-upload +description: > + Upload ITK test data to Filebase IPFS storage and produce .cid content + links via the S3 REST API + npx ipfs-car (no Kubo daemon required), + optionally mirror into ITKTestingData, and normalize existing + .md5 / .sha256 / .cid content links. Use when the user wants to add + test images, baseline data, or model files under Testing/Data/ or a + module's data/ directory, or when asked to convert hash-based content + links to CID. +allowed-tools: + - Bash + - Read +--- + +# ITK External Data Upload + +Upload a file to Filebase IPFS storage and replace it with a `.cid` content +link, maintain `Testing/Data/content-links.manifest`, and (optionally) mirror +the bytes into `ITKTestingData` for the GitHub Pages gateway. Also: regenerate +existing `.md5` / `.sha256` / `.cid` content links under the unixfs-v1-2025 +profile. + +## Prerequisites + +The developer must have the `external-data-upload` pixi environment installed +and Filebase credentials exported. If not, direct them to +[`README.md`](./README.md) in this directory. + +Required: + +- pixi environment installed: `pixi install -e external-data-upload` +- Filebase IPFS bucket with an S3 access key +- Environment variables exported: `FILEBASE_ACCESS_KEY`, + `FILEBASE_SECRET_KEY`, `FILEBASE_BUCKET` + +A local Kubo daemon, IPFS Desktop, or any `ipfs pin remote` PSA service is +**not** required — the upload talks to Filebase's S3 REST API directly and +relies on `npx ipfs-car` (installed via Node.js in the pixi environment) for +local CAR construction. + +## Tasks this skill handles + +### 1. Upload a single file + +Run the upload script via pixi: + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py +``` + +If the user mentions `ITKTestingData` or asks you to mirror the bytes to +GitHub Pages, pass `--testing-data-repo `: + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/upload.py \ + --testing-data-repo \ + +``` + +The script will: + +1. Pack the file into a CARv1 with `npx ipfs-car pack --no-wrap` + (defaults match the unixfs-v1-2025 profile) +2. Upload the CAR to the Filebase IPFS bucket via boto3 with + `Metadata={"import": "car"}` and verify the CID returned by + `head_object` matches the local CID +3. If `--testing-data-repo` given and file ≤ 50 MB, copy to + `/CID/` and `git add` it there. Files over 50 MB are skipped + for the mirror step only (GitHub rejects > 50 MB) — Filebase pinning still + succeeds. +4. Replace the source file with `.cid` +5. Update `Testing/Data/content-links.manifest` + +### 2. Normalize existing content links + +Use when the user wants to convert `.md5` / `.sha256` / `.sha512` links to +`.cid`, or re-generate `.cid` links under the unixfs-v1-2025 profile. + +```bash +pixi run -e external-data-upload python \ + Utilities/Maintenance/ExternalDataUpload/normalize.py +``` + +Useful options: + +- `--dry-run` — report what would change +- `--hash-only` — only touch `.md5` / `.shaNNN` links, leave `.cid` alone +- `--cid-only` — only re-hash existing `.cid` links under the new profile +- `--testing-data-repo ` — forwarded to the upload helper +- `--bucket ` — Filebase bucket override (default: `$FILEBASE_BUCKET`) + +The normalize script fetches bytes through the gateway templates in +`CMake/ITKExternalData.cmake` (same order as the build), verifies them +against the declared hash or CID, and calls `upload.upload_file_to_filebase` +to produce the new `.cid`. + +## After Upload + +Stage the git changes the upload script prints. Typical ITK workflow: + +```bash +git rm +git add .cid +git add Testing/Data/content-links.manifest +``` + +If `--testing-data-repo` was used, follow the printed commands in that repo: + +```bash +git -C commit -m "Add ()" +git -C push +``` + +Commit the ITK changes with an appropriate prefix per +[`Documentation/AI/git-commits.md`](../../../Documentation/AI/git-commits.md): + +- `ENH:` for new test data +- `STYLE:` for normalizing existing content links (no test semantics change) + +## How `.cid` Files Work + +A `.cid` file is one line of plain text: a CIDv1, base32-encoded. ITK's +`CMake/ITKExternalData.cmake` recognises the `.cid` extension and fetches +through the gateway list declared there (local Kubo, `ipfs.io`, +`gateway.pinata.cloud`, `cloudflare-ipfs.com`, `dweb.link`, plus the +`ITKTestingData` GitHub Pages mirror at +`insightsoftwareconsortium.github.io/ITKTestingData/CID/`). + +Because CIDs are content-addressed, integrity is verified automatically at +fetch time. diff --git a/Utilities/Maintenance/ExternalDataUpload/normalize.py b/Utilities/Maintenance/ExternalDataUpload/normalize.py new file mode 100755 index 00000000000..6590b7a6672 --- /dev/null +++ b/Utilities/Maintenance/ExternalDataUpload/normalize.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 +"""Normalize ITK content links: convert ``.md5`` / ``.shaNNN`` links to ``.cid``. + +For each content link found, the script: + +1. Fetches bytes via the gateway templates declared in + ``CMake/ITKExternalData.cmake`` (identical order to the build). +2. Verifies the bytes against the declared hash or CID. +3. Re-materializes the file alongside the link, then runs the Filebase + uploader from ``upload.py`` so a fresh CID is produced under the + ``unixfs-v1-2025`` profile and pinned on Filebase. The old + ``.md5`` / ``.shaNNN`` link is removed; a ``.cid`` link is written + in its place. + +For ``.cid`` content links, this re-uploads under the current +``unixfs-v1-2025`` profile so a CID produced years ago with a different +chunker is regenerated to match the build pipeline. +""" + +from __future__ import annotations + +import argparse +import hashlib +import re +import subprocess +import sys +import tempfile +import urllib.parse +from pathlib import Path + +import upload as upload_module +from upload import ( + REPO_ROOT, + upload_file_to_filebase, + update_manifest, + mirror_to_testing_data, + CIDV1_RE, +) + +CMAKE_FILE = REPO_ROOT / "CMake" / "ITKExternalData.cmake" + +ALGO_UC = { + "md5": "MD5", + "sha1": "SHA1", + "sha224": "SHA224", + "sha256": "SHA256", + "sha384": "SHA384", + "sha512": "SHA512", + "cid": "cid", +} + + +def parse_url_templates(cmake_file: Path) -> list[str]: + """Extract URL templates from the ``ExternalData_URL_TEMPLATES`` list(). + + Locates the ``list(APPEND ExternalData_URL_TEMPLATES ...)`` invocation, + walks its argument list with a paren-aware scanner (templates contain + ``%(hash)`` / ``%(algo)``, so naive ``.*?`` regex closes the match + prematurely on those inner parens), and returns every quoted argument + that contains ``%(hash)`` in declaration order. + """ + text = cmake_file.read_text() + anchor = re.search( + r"list\s*\(\s*APPEND\s+ExternalData_URL_TEMPLATES\s", + text, + ) + if anchor is None: + sys.exit( + f"ERROR: failed to find ExternalData_URL_TEMPLATES list() in " + f"{cmake_file}" + ) + + depth = 1 + i = anchor.end() + in_string = False + end_idx: int | None = None + while i < len(text): + ch = text[i] + if in_string: + if ch == "\\": + i += 2 + continue + if ch == '"': + in_string = False + else: + if ch == '"': + in_string = True + elif ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + if depth == 0: + end_idx = i + break + i += 1 + + if end_idx is None: + sys.exit( + f"ERROR: unterminated ExternalData_URL_TEMPLATES list() in " + f"{cmake_file}" + ) + + body = text[anchor.end() : end_idx] + return [t for t in re.findall(r'"([^"]+)"', body) if "%(hash)" in t] + + +def render_url(template: str, algo: str, value: str) -> str: + return template.replace("%(algo)", algo).replace("%(hash)", value) + + +def hash_bytes(ext: str, data: bytes) -> str: + return hashlib.new(ext, data).hexdigest() + + +def fetch_and_verify(ext: str, value: str, templates: list[str]) -> Path: + """Download bytes from the first gateway whose response verifies; return tempfile path. + + For ``.cid`` links, accept any successful HTTP fetch from a path containing + ``/ipfs/`` because IPFS HTTP gateways verify CIDs server-side. For hash + links, recompute the digest locally. + """ + import requests # imported lazily so --help works without the env active + + algo_uc = ALGO_UC.get(ext) + if algo_uc is None: + raise RuntimeError(f"Unknown content-link extension: .{ext}") + + last_error: Exception | None = None + for template in templates: + rendered = render_url(template, algo_uc, value) + if ext != "cid" and "/ipfs/" in urllib.parse.urlparse(rendered).path: + continue + try: + response = requests.get(rendered, timeout=(10, 120)) + response.raise_for_status() + except requests.RequestException as exc: + last_error = exc + continue + + body = response.content + if not body: + continue + + if ext == "cid": + if "/ipfs/" not in urllib.parse.urlparse(rendered).path: + # Non-IPFS origin (e.g. GitHub Pages mirror). We can't verify + # locally without risking chunker-drift false negatives, so + # we keep looking for an IPFS gateway entry. + continue + else: + actual = hash_bytes(ext, body) + if actual.lower() != value.lower(): + print( + f"WARN: content from {rendered} did not verify; " + "trying next gateway", + file=sys.stderr, + ) + continue + + out = Path(tempfile.mkstemp(prefix="itk-content-link.")[1]) + out.write_bytes(body) + return out + + raise RuntimeError( + f"Failed to fetch and verify {ext}={value} from any gateway" + + (f" (last error: {last_error})" if last_error else "") + ) + + +def enumerate_links(target: Path, hash_only: bool, cid_only: bool) -> list[Path]: + if target.is_file(): + return [target] + exts = {f".{e}" for e in upload_module.CONTENT_LINK_EXTS} + found = sorted(p for p in target.rglob("*") if p.is_file() and p.suffix in exts) + filtered: list[Path] = [] + for link in found: + ext = link.suffix.lstrip(".") + if hash_only and ext == "cid": + continue + if cid_only and ext != "cid": + continue + filtered.append(link) + return filtered + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description=( + "Normalize ITK content links: convert .md5 / .shaNNN to .cid and " + "regenerate existing .cid under the unixfs-v1-2025 profile." + ), + ) + parser.add_argument("target", help="Path or directory to process") + parser.add_argument( + "--testing-data-repo", + metavar="PATH", + help="Forwarded to upload.py; mirror bytes into a local ITKTestingData clone.", + ) + parser.add_argument( + "--bucket", + help="Filebase IPFS bucket (default: $FILEBASE_BUCKET).", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="List what would change, modify nothing.", + ) + mode = parser.add_mutually_exclusive_group() + mode.add_argument( + "--hash-only", + action="store_true", + help="Process only .md5 / .shaNNN links; leave .cid alone.", + ) + mode.add_argument( + "--cid-only", + action="store_true", + help="Process only .cid links (re-hash under unixfs-v1-2025).", + ) + args = parser.parse_args(argv) + + target = Path(args.target) + if not target.exists(): + sys.exit(f"ERROR: not found: {args.target}") + if not CMAKE_FILE.exists(): + sys.exit(f"ERROR: cannot find {CMAKE_FILE}") + + upload_module._check_node_available() + access_key, secret_key, bucket = upload_module._credentials(args) + + testing_data_repo: Path | None = None + if args.testing_data_repo: + testing_data_repo = Path(args.testing_data_repo).resolve() + if not (testing_data_repo / ".git").exists(): + sys.exit( + f"ERROR: --testing-data-repo is not a git checkout: " + f"{args.testing_data_repo}" + ) + + templates = parse_url_templates(CMAKE_FILE) + print(f"==> Loaded {len(templates)} gateway template(s) from {CMAKE_FILE}") + + links = enumerate_links(target, args.hash_only, args.cid_only) + if not links: + print(f"No matching content links under {target}. Nothing to do.") + return 0 + print(f"==> Processing {len(links)} content link(s)...") + if args.dry_run: + print("(--dry-run: no files will be modified)") + + fail = 0 + for link in links: + ext = link.suffix.lstrip(".") + value = link.read_text().strip() + real_file = link.with_suffix("") + if not value: + print(f"FAIL {link} empty-content-link", file=sys.stderr) + fail += 1 + continue + if ext == "cid" and not CIDV1_RE.match(value): + print(f"FAIL {link} invalid-cid", file=sys.stderr) + fail += 1 + continue + + if args.dry_run: + print( + f"WOULD-NORMALIZE {link} ({ext}={value}) -> {real_file}.cid" + ) + continue + + print(f"==> Normalizing {link} ({ext}={value})") + + if real_file.exists(): + sys.exit( + f"ERROR: refusing to normalize: {real_file} already exists. " + "Delete or move it first." + ) + + try: + tmp_bytes = fetch_and_verify(ext, value, templates) + except RuntimeError as exc: + print(f"FAIL {link} {exc}", file=sys.stderr) + fail += 1 + continue + + tmp_bytes.rename(real_file) + link.unlink() + + try: + cid = upload_file_to_filebase(real_file, bucket, access_key, secret_key) + except (subprocess.CalledProcessError, RuntimeError) as exc: + print(f"FAIL {link} upload-failed: {exc}", file=sys.stderr) + link.write_text(value + "\n") + real_file.unlink(missing_ok=True) + fail += 1 + continue + + if testing_data_repo is not None: + mirror_to_testing_data(real_file, cid, testing_data_repo) + + cid_path = real_file.with_name(real_file.name + ".cid") + cid_path.write_text(cid + "\n") + real_file.unlink() + rel_path = real_file.relative_to(REPO_ROOT).as_posix() + update_manifest(cid, rel_path) + print(f"NORMALIZE {link} ({ext}) -> {cid_path}") + + if fail: + print(f"WARN: {fail} content link(s) failed to normalize.", file=sys.stderr) + return 2 + + print( + "Done. Review changes and commit as a STYLE: commit " + "(see Documentation/AI/git-commits.md)." + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/Utilities/Maintenance/ExternalDataUpload/upload.py b/Utilities/Maintenance/ExternalDataUpload/upload.py new file mode 100755 index 00000000000..b6716035407 --- /dev/null +++ b/Utilities/Maintenance/ExternalDataUpload/upload.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python3 +"""Upload a file to Filebase IPFS storage and replace it with a .cid content link. + +Builds a CARv1 of the input via ``npx ipfs-car pack`` (defaults: 1 MiB chunks, +1024 children per node, raw leaves, CIDv1 — matches the ``unixfs-v1-2025`` / +IPIP-0499 profile so CIDs are reproducible across implementations) and uploads +the CAR to a Filebase IPFS bucket through its S3-compatible REST API with the +``import: car`` user metadata header. Filebase imports the CAR server-side and +returns the resulting CID in object metadata, which is read back via +``head_object`` and compared against the locally computed root CID. + +The ``.cid`` content link, the manifest entry in +``Testing/Data/content-links.manifest``, and (optionally) a mirror of the bytes +in a local ITKTestingData clone are all produced in the same invocation. +""" + +from __future__ import annotations + +import argparse +import os +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[3] +MANIFEST = REPO_ROOT / "Testing" / "Data" / "content-links.manifest" +GITHUB_FILE_LIMIT_BYTES = 50 * 1024 * 1024 +FILEBASE_ENDPOINT = "https://s3.filebase.com" + +CONTENT_LINK_EXTS = ("cid", "md5", "sha1", "sha224", "sha256", "sha384", "sha512") +CIDV1_RE = re.compile(r"^baf[a-z0-9]{50,}$") + + +def build_car(input_path: Path, output_car: Path) -> str: + """Pack ``input_path`` into a CARv1 at ``output_car`` and return its root CID. + + Uses ``npx ipfs-car pack`` with ``--no-wrap`` (single-file upload, no + wrapping directory). ipfs-car v1+ defaults match the unixfs-v1-2025 profile + (1 MiB chunks, 1024 links/node, raw leaves, CIDv1), so no extra flags are + needed to produce a reproducible CID. + """ + result = subprocess.run( + [ + "npx", + "--yes", + "ipfs-car", + "pack", + str(input_path), + "--no-wrap", + "--output", + str(output_car), + ], + capture_output=True, + text=True, + check=True, + ) + for line in reversed(result.stdout.splitlines()): + token = line.strip().split()[-1] if line.strip() else "" + if CIDV1_RE.match(token): + return token + raise RuntimeError( + f"Could not parse CID from `npx ipfs-car pack` output:\n{result.stdout}" + ) + + +def upload_car_to_filebase( + car_path: Path, + bucket: str, + object_key: str, + access_key: str, + secret_key: str, +) -> str: + """Upload a CAR to a Filebase IPFS bucket and return the CID Filebase reports. + + Setting ``Metadata={"import": "car"}`` tells Filebase to import the CAR + server-side; the imported root CID is then exposed via + ``head_object()['Metadata']['cid']``. ``put_object`` is used directly + rather than ``upload_file`` because the latter's multipart code path can + strip user metadata on small payloads. + """ + import boto3 # imported lazily so --help works without the env active + + s3 = boto3.client( + "s3", + endpoint_url=FILEBASE_ENDPOINT, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name="us-east-1", + ) + with car_path.open("rb") as f: + s3.put_object( + Bucket=bucket, + Key=object_key, + Body=f, + Metadata={"import": "car"}, + ) + head = s3.head_object(Bucket=bucket, Key=object_key) + return head.get("Metadata", {}).get("cid", "") + + +def upload_file_to_filebase( + input_path: Path, + bucket: str, + access_key: str, + secret_key: str, +) -> str: + """End-to-end: build CAR for ``input_path``, upload it, verify, return CID.""" + object_key = input_path.name + ".car" + with tempfile.NamedTemporaryFile(suffix=".car", delete=False) as tmp: + car_path = Path(tmp.name) + try: + local_cid = build_car(input_path, car_path) + remote_cid = upload_car_to_filebase( + car_path, bucket, object_key, access_key, secret_key + ) + if not remote_cid: + raise RuntimeError( + f"Filebase did not return a CID for {object_key}. The CAR may " + "not have been recognised — check the bucket is an IPFS bucket " + "and the access key has write permission." + ) + if local_cid != remote_cid: + raise RuntimeError( + f"CID mismatch: local={local_cid}, filebase={remote_cid}. " + "This indicates a chunker/profile drift between this client " + "and Filebase — file an issue." + ) + return local_cid + finally: + car_path.unlink(missing_ok=True) + + +def update_manifest(cid: str, rel_path: str) -> None: + """Insert/replace ``cid `` in Testing/Data/content-links.manifest.""" + MANIFEST.parent.mkdir(parents=True, exist_ok=True) + + header_lines: list[str] = [] + data_lines: list[str] = [] + if MANIFEST.exists(): + for line in MANIFEST.read_text().splitlines(): + if line.startswith("#"): + header_lines.append(line) + elif line.strip(): + fields = line.split() + if len(fields) >= 2 and fields[1] == rel_path: + continue + data_lines.append(line) + else: + header_lines = [ + "# ITK content-link manifest", + "# One CID per line, format: ", + "# Maintained by Utilities/Maintenance/ExternalDataUpload/upload.py", + ] + + data_lines.append(f"{cid} {rel_path}") + data_lines.sort(key=lambda s: s.split()[1]) + + MANIFEST.write_text("\n".join(header_lines + data_lines) + "\n") + + +def mirror_to_testing_data( + file_path: Path, cid: str, testing_data_repo: Path +) -> bool: + """Copy ``file_path`` to ``/CID/`` and ``git add`` it. + + Returns False (with a warning) for files over GitHub's 50 MB push limit; + True after a successful copy + stage. + """ + size = file_path.stat().st_size + if size > GITHUB_FILE_LIMIT_BYTES: + print( + f"WARNING: {file_path.name} is {size} bytes (> 50 MB). GitHub " + "rejects pushes containing files > 50 MB, so it will NOT be " + "mirrored to ITKTestingData. The Filebase upload still succeeded; " + "the .cid content link will still be produced.", + file=sys.stderr, + ) + return False + + mirror_dir = testing_data_repo / "CID" + mirror_dir.mkdir(parents=True, exist_ok=True) + mirror_path = mirror_dir / cid + print(f"==> Mirroring to ITKTestingData: CID/{cid}") + shutil.copy2(file_path, mirror_path) + try: + subprocess.run( + ["git", "-C", str(testing_data_repo), "add", f"CID/{cid}"], + check=True, + ) + except subprocess.CalledProcessError: + mirror_path.unlink(missing_ok=True) + raise + return True + + +def _validate_input(file_arg: str) -> Path: + file_path = Path(file_arg) + if not file_path.exists(): + sys.exit(f"ERROR: File not found: {file_arg}") + if file_path.is_symlink(): + sys.exit( + f"ERROR: Symlink paths are not supported: {file_arg}\n" + " Pass the real file path instead." + ) + if not file_path.is_file(): + sys.exit(f"ERROR: Not a regular file: {file_arg}") + + abs_path = file_path.resolve() + try: + abs_path.relative_to(REPO_ROOT) + except ValueError: + sys.exit(f"ERROR: File must be inside the repository: {abs_path}") + + suffix = abs_path.suffix.lstrip(".") + if suffix in CONTENT_LINK_EXTS: + sys.exit(f"ERROR: File is already a .{suffix} content link: {file_arg}") + + rel = abs_path.relative_to(REPO_ROOT).as_posix() + if any(c.isspace() for c in rel): + sys.exit( + f"ERROR: Filepath contains whitespace, which is not supported: {rel}\n" + " Rename the file to remove spaces before uploading." + ) + + return abs_path + + +def _credentials(args: argparse.Namespace) -> tuple[str, str, str]: + access_key = os.environ.get("FILEBASE_ACCESS_KEY", "") + secret_key = os.environ.get("FILEBASE_SECRET_KEY", "") + bucket = args.bucket or os.environ.get("FILEBASE_BUCKET", "") + missing = [ + name + for name, value in [ + ("FILEBASE_ACCESS_KEY", access_key), + ("FILEBASE_SECRET_KEY", secret_key), + ("FILEBASE_BUCKET (or --bucket)", bucket), + ] + if not value + ] + if missing: + sys.exit( + "ERROR: Missing Filebase credentials: " + ", ".join(missing) + "\n" + " See: Utilities/Maintenance/ExternalDataUpload/README.md" + ) + return access_key, secret_key, bucket + + +def _check_node_available() -> None: + if shutil.which("npx") is None: + sys.exit( + "ERROR: 'npx' not found on PATH (Node.js required for ipfs-car).\n" + " Run inside the pixi environment:\n" + " pixi run -e external-data-upload python " + "Utilities/Maintenance/ExternalDataUpload/upload.py " + ) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description=( + "Upload a file to Filebase IPFS storage; produce a .cid content " + "link, update Testing/Data/content-links.manifest, and optionally " + "mirror the bytes into ITKTestingData." + ), + ) + parser.add_argument("file", help="Path to the file to upload") + parser.add_argument( + "--testing-data-repo", + metavar="PATH", + help=( + "Local clone of github.com/InsightSoftwareConsortium/ITKTestingData; " + "files ≤ 50 MB are copied to /CID/ and `git add`ed." + ), + ) + parser.add_argument( + "--bucket", + help="Filebase IPFS bucket (default: $FILEBASE_BUCKET).", + ) + args = parser.parse_args(argv) + + _check_node_available() + abs_path = _validate_input(args.file) + access_key, secret_key, bucket = _credentials(args) + + testing_data_repo: Path | None = None + if args.testing_data_repo: + testing_data_repo = Path(args.testing_data_repo).resolve() + if not testing_data_repo.is_dir(): + sys.exit( + f"ERROR: --testing-data-repo path is not a directory: " + f"{args.testing_data_repo}" + ) + if not (testing_data_repo / ".git").exists(): + sys.exit( + f"ERROR: --testing-data-repo is not a git checkout: " + f"{args.testing_data_repo}" + ) + + rel_path = abs_path.relative_to(REPO_ROOT).as_posix() + print(f"==> Packing {abs_path.name} into a CAR (unixfs-v1-2025 profile)...") + print(f"==> Uploading to Filebase bucket {bucket!r}...") + cid = upload_file_to_filebase(abs_path, bucket, access_key, secret_key) + print(f" CID: {cid}") + + mirrored = False + if testing_data_repo is not None: + mirrored = mirror_to_testing_data(abs_path, cid, testing_data_repo) + + cid_path = abs_path.with_name(abs_path.name + ".cid") + cid_path.write_text(cid + "\n") + abs_path.unlink() + + update_manifest(cid, rel_path) + + rel_cid = cid_path.relative_to(REPO_ROOT).as_posix() + print() + print("==> Upload complete.") + print(f" CID: {cid}") + print(f" Link: {cid_path}") + print() + print("Next steps (ITK repository):") + print(f' git rm "{rel_path}"') + print(f' git add "{rel_cid}"') + print(" git add Testing/Data/content-links.manifest") + if mirrored and testing_data_repo is not None: + print() + print(f"Next steps (ITKTestingData repository at {testing_data_repo}):") + print( + f' git -C "{testing_data_repo}" commit ' + f'-m "Add {abs_path.name} ({cid})"' + ) + print(f' git -C "{testing_data_repo}" push') + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index 1ab39fd4202..e8b2126ba5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,11 @@ clean = { cmd = "git clean -fdx", description = "Clean the repository" } [tool.pixi.dependencies] python = ">=3.13.5,<3.14" +[tool.pixi.feature.external-data-upload.dependencies] +boto3 = ">=1.34" +nodejs = ">=20" +requests = ">=2.32" + [tool.pixi.feature.pre-commit.dependencies] pre-commit = ">=4.1.0,<5" @@ -173,3 +178,4 @@ dev = ["dev"] cxx = ["dev", "cxx"] python = ["python", "dev"] pre-commit = ["pre-commit"] +external-data-upload = ["external-data-upload"]