diff --git a/Gemfile b/Gemfile index 66ceee92..289cbdc0 100644 --- a/Gemfile +++ b/Gemfile @@ -36,6 +36,7 @@ gem 'sentry-ruby' gem 'simple_form' gem 'skylight' gem 'terser' +gem 'webmock' gem 'zip_tricks' group :production do diff --git a/Gemfile.lock b/Gemfile.lock index f6647010..8c5b5136 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -154,6 +154,9 @@ GEM cocoon (1.2.15) concurrent-ruby (1.3.5) connection_pool (2.5.3) + crack (1.0.0) + bigdecimal + rexml crass (1.0.6) date (3.4.1) delayed_job (4.1.13) @@ -182,6 +185,7 @@ GEM terminal-table (>= 1.8) globalid (1.2.1) activesupport (>= 6.1) + hashdiff (1.2.0) hashie (5.0.0) i18n (1.14.7) concurrent-ruby (~> 1.0) @@ -445,6 +449,10 @@ GEM activemodel (>= 6.0.0) bindex (>= 0.4.0) railties (>= 6.0.0) + webmock (3.25.1) + addressable (>= 2.8.0) + crack (>= 0.3.2) + hashdiff (>= 0.4.0, < 2.0.0) websocket (1.2.11) websocket-driver (0.7.7) base64 @@ -509,6 +517,7 @@ DEPENDENCIES terser timecop web-console + webmock zip_tricks RUBY VERSION diff --git a/README.md b/README.md index d2da0fc0..47593781 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,19 @@ polling by specifying a longer queue wait time. Defaults to 10 if unset. `SQS_RESULT_IDLE_TIMEOUT` - Configures the :idle_timeout arg of the AWS poll method, which specifies the maximum time in seconds to wait for a new message before the polling loop exists. Defaults to 0 if unset. +### Archival Packaging Tool (APT) configuration + +The following enviroment variables are needed to communicate with [APT](https://github.com/MITLibraries/archival-packaging-tool), which is used in the +[preservation workflow](#publishing-workflow). + +`APT_CHALLENGE_SECRET` - Secret value used to authenticate requests to the APT Lambda endpoint. +`APT_VERBOSE` - If set to `true`, enables verbose logging for APT requests. +`APT_CHECKSUMS_TO_GENERATE` - Array of checksum algorithms to generate for files (default: ['md5']). +`APT_COMPRESS_ZIP` - Boolean value to indicate whether the output bag should be compressed as a zip +file (default: true). +`APT_S3_BUCKET` - S3 bucket URI where APT output bags are stored. +`APT_LAMBDA_URL` - The URL of the APT Lambda endpoint for preservation requests. + ### Email configuration `SMTP_ADDRESS`, `SMTP_PASSWORD`, `SMTP_PORT`, `SMTP_USER` - all required to send mail. @@ -397,15 +410,15 @@ Note: `Pending publication` is allowed here, but not expected to be a normal occ ## Preservation workflow The publishing workflow will automatically trigger preservation for all of the published theses in the results queue. -At this point a submission information package is generated for each thesis, then a bag is constructed, zipped, and -streamed to an S3 bucket. (See the SubmissionInformationPackage and SubmissionInformationPackageZipper classes for more -details on this part of the process.) -Once they are in the S3 bucket, the bags are automatically replicated to the Digital Preservation S3 bucket, where they -can be ingested into Archivematica. +At this point, the preservation job will generate an Archivematica payload for each thesis, which +are then POSTed to [APT](https://github.com/MITLibraries/archival-packaging-tool) for further processing. Each payload includes a metadata CSV and a JSON object containing structural information about the thesis files. + +Once the payloads are sent to APT, each thesis is structured as a BagIt bag and saved to an S3 +bucket, where they can be ingested into Archivematica. -A thesis can be sent to preservation more than once. In order to track provenance across multiple preservation events, -we persist certain data about the SIP and audit the model using `paper_trail`. +A thesis can be sent to preservation more than once. In order to track provenance across multiple preservation events, we persist certain data about the Archivematica payload and audit the model +using `paper_trail`. ### Preserving a single thesis diff --git a/app/jobs/preservation_submission_job.rb b/app/jobs/preservation_submission_job.rb index df6c83aa..13ab5f1c 100644 --- a/app/jobs/preservation_submission_job.rb +++ b/app/jobs/preservation_submission_job.rb @@ -1,4 +1,7 @@ class PreservationSubmissionJob < ActiveJob::Base + require 'net/http' + require 'uri' + queue_as :default def perform(theses) @@ -6,8 +9,8 @@ def perform(theses) results = { total: theses.count, processed: 0, errors: [] } theses.each do |thesis| Rails.logger.info("Thesis #{thesis.id} is now being prepared for preservation") - sip = thesis.submission_information_packages.create! - preserve_sip(sip) + payload = thesis.archivematica_payloads.create! + preserve_payload(payload) Rails.logger.info("Thesis #{thesis.id} has been sent to preservation") results[:processed] += 1 rescue StandardError, Aws::Errors => e @@ -20,10 +23,34 @@ def perform(theses) private - def preserve_sip(sip) - SubmissionInformationPackageZipper.new(sip) - sip.preservation_status = 'preserved' - sip.preserved_at = DateTime.now - sip.save + def preserve_payload(payload) + post_payload(payload) + payload.preservation_status = 'preserved' + payload.preserved_at = DateTime.now + payload.save! + end + + def post_payload(payload) + s3_url = ENV.fetch('APT_LAMBDA_URL', nil) + uri = URI.parse(s3_url) + request = Net::HTTP::Post.new(uri, { 'Content-Type' => 'application/json' }) + request.body = payload.payload_json + + response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| + http.request(request) + end + + validate_response(response) + end + + def validate_response(response) + unless response.is_a?(Net::HTTPSuccess) + raise "Failed to post Archivematica payload to APT: #{response.code} #{response.body}" + end + + result = JSON.parse(response.body) + unless result['success'] == true + raise "APT failed to create a bag: #{response.body}" + end end end diff --git a/app/models/archivematica_payload.rb b/app/models/archivematica_payload.rb new file mode 100644 index 00000000..91efdf23 --- /dev/null +++ b/app/models/archivematica_payload.rb @@ -0,0 +1,101 @@ +# == Schema Information +# +# Table name: archivematica_payloads +# +# id :integer not null, primary key +# preservation_status :integer default("unpreserved"), not null +# payload_json :text +# preserved_at :datetime +# thesis_id :integer not null +# created_at :datetime not null +# updated_at :datetime not null +# +# This class assembles a payload to send to the Archival Packaging Tool (APT), which then creates a bag for +# preservation. It includes the thesis files, metadata, and checksums. The payload is then serialized to JSON +# for transmission. +# +# Instances of this class are invalid without an associated thesis that has a DSpace handle, a copyright, and +# at least one attached file with no duplicate filenames. +# +# There is some intentional duplication between this and the SubmissionInformationPackage model. The +# SubmissionInformationPackage is the legacy model that was used to create the bag, but it is not +# used in the current APT workflow. We are retaining it for historical purposes. +class ArchivematicaPayload < ApplicationRecord + include Checksums + include Baggable + + has_paper_trail + belongs_to :thesis + has_one_attached :metadata_csv + + validates :baggable?, presence: true + + before_create :set_metadata_csv, :set_payload_json + + enum preservation_status: %i[unpreserved preserved] + + private + + # compress_zip is cast to a boolean to override the string value from ENV. APT strictly requires + # a boolean for this field. + def build_payload + { + action: 'create-bagit-zip', + challenge_secret: ENV.fetch('APT_CHALLENGE_SECRET', nil), + verbose: ActiveModel::Type::Boolean.new.cast(ENV.fetch('APT_VERBOSE', false)), + input_files: build_input_files, + checksums_to_generate: ENV.fetch('APT_CHECKSUMS_TO_GENERATE', ['md5']), + output_zip_s3_uri: bag_output_uri, + compress_zip: ActiveModel::Type::Boolean.new.cast(ENV.fetch('APT_COMPRESS_ZIP', true)) + } + end + + # Build input_files array from thesis files and attached metadata CSV + def build_input_files + files = thesis.files.map { |file| build_file_entry(file) } + files << build_file_entry(metadata_csv) # Metadata CSV is the only file that is generated in this model + files + end + + # Build a file entry for each file, including the metadata CSV. + def build_file_entry(file) + { + uri: ["s3://#{ENV.fetch('AWS_S3_BUCKET')}", file.blob.key].join('/'), + filepath: set_filepath(file), + checksums: { + md5: base64_to_hex(file.blob.checksum) + } + } + end + + def set_filepath(file) + file == metadata_csv ? 'metadata/metadata.csv' : file.filename.to_s + end + + # The bag_name has to be unique due to our using it as the basis of an ActiveStorage key. Using a UUID + # was not preferred as the target system of these bags adds it's own UUID to the file when it arrives there + # so the filename was unwieldy with two UUIDs embedded in it so we simply increment integers. + def bag_name + safe_handle = thesis.dspace_handle.gsub('/', '_') + "#{safe_handle}-thesis-#{thesis.submission_information_packages.count + 1}" + end + + # The bag_output_uri key is constructed to match the expected format for Archivematica. + def bag_output_uri + key = "etdsip/#{thesis.graduation_year}/#{thesis.graduation_month}-#{thesis.accession_number}/#{bag_name}.zip" + [ENV.fetch('APT_S3_BUCKET'), key].join('/') + end + + def baggable? + baggable_thesis?(thesis) + end + + def set_metadata_csv + csv_data = ArchivematicaMetadata.new(thesis).to_csv + metadata_csv.attach(io: StringIO.new(csv_data), filename: 'metadata.csv', content_type: 'text/csv') + end + + def set_payload_json + self.payload_json = build_payload.to_json + end +end diff --git a/app/models/submission_information_package.rb b/app/models/submission_information_package.rb index 893d0699..f440e5dc 100644 --- a/app/models/submission_information_package.rb +++ b/app/models/submission_information_package.rb @@ -14,6 +14,10 @@ # updated_at :datetime not null # +# This model is no longer used, but it is retained for historical purposes and to preserve existing +# data. Its functionality has been replaced by the ArchivematicaPayload model, which is used in the +# current preservation workflow. +# # Creates the structure for an individual thesis to be preserved in Archivematica according to the BagIt spec: # https://datatracker.ietf.org/doc/html/rfc8493. # diff --git a/app/models/submission_information_package_zipper.rb b/app/models/submission_information_package_zipper.rb deleted file mode 100644 index bdf179df..00000000 --- a/app/models/submission_information_package_zipper.rb +++ /dev/null @@ -1,54 +0,0 @@ -# SubmissionInformationPackageZipper creates a temporary zip file containing a bag and then attaches it via -# ActiveStorage. The `keygen` method creates the `path` to the file in S3. We are relying on S3 Replication to copy -# the objects from the ETD bucket to the Archivematica bucket. There is no automatic communication back from S3 or -# Archivematica to ETD, so we treat the successful save of the zip file as `preserved`. -class SubmissionInformationPackageZipper - def initialize(sip) - Tempfile.create([sip.bag_name.to_s, '.zip'], binmode: true) do |tmpfile| - bagamatic(sip, tmpfile) - - sip.bag.attach(io: File.open(tmpfile), - key: keygen(sip), - filename: "#{sip.bag_name}.zip", - content_type: 'application/zip') - end - end - - private - - # This key needs to be unique. By default, ActiveStorage generates a UUID, but since we want a file path for our - # Archivematica needs, we are generating a key. We handle uniqueness on the `bag_name` side. - def keygen(sip) - thesis = sip.thesis - "etdsip/#{thesis.graduation_year}/#{thesis.graduation_month}-#{thesis.accession_number}/#{sip.bag_name}.zip" - end - - # bagamatic takes a sip, creates a temporary zip file, and returns that file - def bagamatic(sip, tmpfile) - ZipTricks::Streamer.open(tmpfile) do |zip| - # bag manifest - zip.write_deflated_file('manifest-md5.txt') do |sink| - sink << sip.manifest - end - - # bag_declaration - zip.write_deflated_file('bagit.txt') do |sink| - sink << sip.bag_declaration - end - - # files. metadata.csv is just a string so we have to treat it differently than the binary stored files - sip.data.each do |data| - if data[1].is_a?(String) - zip.write_deflated_file(data[0]) do |sink| - sink << data[1] - end - else - zip.write_stored_file(data[0]) do |sink| - sink << data[1].download - end - end - end - end - tmpfile.close - end -end diff --git a/app/models/thesis.rb b/app/models/thesis.rb index 1c2ee202..2c2b7390 100644 --- a/app/models/thesis.rb +++ b/app/models/thesis.rb @@ -48,6 +48,7 @@ class Thesis < ApplicationRecord has_many :users, through: :authors has_many :submission_information_packages, dependent: :destroy + has_many :archivematica_payloads, dependent: :destroy has_many_attached :files has_one_attached :dspace_metadata diff --git a/config/environments/test.rb b/config/environments/test.rb index 0324c05f..03a8489b 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -40,9 +40,14 @@ ENV['SQS_RESULT_WAIT_TIME_SECONDS'] = '10' ENV['SQS_RESULT_IDLE_TIMEOUT'] = '0' ENV['AWS_REGION'] = 'us-east-1' + ENV['AWS_S3_BUCKET'] = 'fake-etd-bucket' ENV['DSPACE_DOCTORAL_HANDLE'] = '1721.1/999999' ENV['DSPACE_GRADUATE_HANDLE'] = '1721.1/888888' ENV['DSPACE_UNDERGRADUATE_HANDLE'] = '1721.1/777777' + ENV['APT_CHALLENGE_SECRET'] = 'fake-challenge-secret' + ENV['APT_S3_BUCKET'] = 's3://fake-apt-bucket' + ENV['APT_LAMBDA_URL'] = 'https://fake-lambda.example.com/' + ENV['APT_COMPRESS_ZIP'] = 'true' # While tests run files are not watched, reloading is not necessary. config.enable_reloading = false diff --git a/db/migrate/20250624182142_create_archivematica_payloads.rb b/db/migrate/20250624182142_create_archivematica_payloads.rb new file mode 100644 index 00000000..12ee7d29 --- /dev/null +++ b/db/migrate/20250624182142_create_archivematica_payloads.rb @@ -0,0 +1,13 @@ +class CreateArchivematicaPayloads < ActiveRecord::Migration[7.1] + def change + create_table :archivematica_payloads do |t| + t.integer :preservation_status, null: false, default: 0 + t.text :payload_json + t.datetime :preserved_at + + t.references :thesis, null: false, foreign_key: true + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 1d1610c4..08e8f3a7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2025_02_21_202836) do +ActiveRecord::Schema[7.1].define(version: 2025_06_24_182142) do create_table "active_storage_attachments", force: :cascade do |t| t.string "name", null: false t.string "record_type", null: false @@ -65,6 +65,16 @@ t.index ["degree_period_id"], name: "index_archivematica_accessions_on_degree_period_id", unique: true end + create_table "archivematica_payloads", force: :cascade do |t| + t.integer "preservation_status", default: 0, null: false + t.text "payload_json" + t.datetime "preserved_at" + t.integer "thesis_id", null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["thesis_id"], name: "index_archivematica_payloads_on_thesis_id" + end + create_table "authors", force: :cascade do |t| t.integer "user_id", null: false t.integer "thesis_id", null: false @@ -290,6 +300,7 @@ add_foreign_key "active_storage_attachments", "active_storage_blobs", column: "blob_id" add_foreign_key "active_storage_variant_records", "active_storage_blobs", column: "blob_id" add_foreign_key "archivematica_accessions", "degree_periods" + add_foreign_key "archivematica_payloads", "theses" add_foreign_key "authors", "theses" add_foreign_key "authors", "users" add_foreign_key "degrees", "degree_types" diff --git a/test/jobs/preservation_submission_job_test.rb b/test/jobs/preservation_submission_job_test.rb index 55108272..66aee414 100644 --- a/test/jobs/preservation_submission_job_test.rb +++ b/test/jobs/preservation_submission_job_test.rb @@ -1,4 +1,5 @@ require 'test_helper' +require 'webmock/minitest' class PreservationSubmissionJobTest < ActiveJob::TestCase @@ -16,7 +17,55 @@ def setup_thesis thesis end - test 'sends report emails' do + def setup_thesis_two + thesis = theses(:published_with_sip) + thesis.files = [] + thesis.save + file = Rails.root.join('test', 'fixtures', 'files', 'registrar_data_small_sample.csv') + thesis.files.attach(io: File.open(file), filename: 'registrar_data_small_sample.csv') + thesis + end + + def stub_apt_lambda_success + stub_request(:post, ENV['APT_LAMBDA_URL']) + .to_return( + status: 200, + body: { + success: true, + bag: { + entries: { + "manifest-md5.txt" => { md5: "fakehash" } + } + }, + output_zip_s3_uri: "s3://my-bucket/apt-testing/test-one-medium.zip" + }.to_json, + headers: { 'Content-Type' => 'application/json' } + ) + end + + def stub_apt_lambda_failure + stub_request(:post, ENV['APT_LAMBDA_URL']) + .to_return( + status: 500, + headers: { 'Content-Type' => 'application/json' } + ) + end + + def stub_apt_lambda_200_failure + stub_request(:post, ENV['APT_LAMBDA_URL']) + .to_return( + status: 200, + body: { + success: false, + error: "An error occurred (404) when calling the HeadObject operation: Not Found", + bag: { entries: {}}, + }.to_json, + headers: { 'Content-Type' => 'application/json' } + ) + end + + test 'sends report emails on success' do + stub_apt_lambda_success ClimateControl.modify DISABLE_ALL_EMAIL: 'false' do assert_difference('ActionMailer::Base.deliveries.size', 1) do PreservationSubmissionJob.perform_now([setup_thesis]) @@ -24,60 +73,118 @@ def setup_thesis end end - test 'creates a SIP' do + test 'sends report emails on failure' do + stub_apt_lambda_success + ClimateControl.modify DISABLE_ALL_EMAIL: 'false' do + assert_difference('ActionMailer::Base.deliveries.size', 1) do + PreservationSubmissionJob.perform_now([setup_thesis]) + end + end + end + + test 'sends report email if post succeeds but APT fails' do + stub_apt_lambda_200_failure + ClimateControl.modify DISABLE_ALL_EMAIL: 'false' do + assert_difference('ActionMailer::Base.deliveries.size', 1) do + PreservationSubmissionJob.perform_now([setup_thesis]) + end + end + end + + test 'creates an Archivematica payload' do + stub_apt_lambda_success thesis = setup_thesis - assert_equal 0, thesis.submission_information_packages.count + assert_equal 0, thesis.archivematica_payloads.count PreservationSubmissionJob.perform_now([thesis]) - assert_equal 1, thesis.submission_information_packages.count + assert_equal 1, thesis.archivematica_payloads.count end - test 'creates multiple SIPs' do + test 'creates multiple Archivematica payloads' do + stub_apt_lambda_success thesis_one = setup_thesis thesis_two = theses(:engineer) - assert_equal 0, thesis_one.submission_information_packages.count - assert_equal 0, thesis_two.submission_information_packages.count + assert_equal 0, thesis_one.archivematica_payloads.count + assert_equal 0, thesis_two.archivematica_payloads.count theses = [thesis_one, thesis_two] PreservationSubmissionJob.perform_now(theses) - assert_equal 1, thesis_one.submission_information_packages.count - assert_equal 1, thesis_two.submission_information_packages.count + assert_equal 1, thesis_one.archivematica_payloads.count + assert_equal 1, thesis_two.archivematica_payloads.count PreservationSubmissionJob.perform_now(theses) - assert_equal 2, thesis_one.submission_information_packages.count - assert_equal 2, thesis_two.submission_information_packages.count + assert_equal 2, thesis_one.archivematica_payloads.count + assert_equal 2, thesis_two.archivematica_payloads.count end test 'updates preservation_status to "preserved" after successfully processing a thesis' do + stub_apt_lambda_success thesis = setup_thesis PreservationSubmissionJob.perform_now([thesis]) - assert_equal 'preserved', thesis.submission_information_packages.last.preservation_status + assert_equal 'preserved', thesis.archivematica_payloads.last.preservation_status end test 'updates preserved_at to the current time after successfully processing a thesis' do + stub_apt_lambda_success time = DateTime.new.getutc Timecop.freeze(time) do thesis = setup_thesis PreservationSubmissionJob.perform_now([thesis]) - assert_equal time, thesis.submission_information_packages.last.preserved_at + assert_equal time, thesis.archivematica_payloads.last.preserved_at end end test 'throws exceptions when a thesis is unbaggable' do + stub_apt_lambda_failure assert_raises StandardError do PreservationSubmissionJob.perform_now([theses[:one]]) end + stub_apt_lambda_success assert_nothing_raised do PreservationSubmissionJob.perform_now([setup_thesis]) end end - test 'does not create a SIP if the job enters an error state' do - thesis = theses(:one) - assert_empty thesis.submission_information_packages + test 'does not create payloads if job fails' do + stub_apt_lambda_failure + bad_thesis = theses(:one) + good_thesis = setup_thesis + another_good_thesis = setup_thesis_two + assert_empty bad_thesis.archivematica_payloads + assert_empty good_thesis.archivematica_payloads + assert_empty another_good_thesis.archivematica_payloads - PreservationSubmissionJob.perform_now([thesis]) - assert_empty thesis.submission_information_packages + PreservationSubmissionJob.perform_now([good_thesis, bad_thesis, another_good_thesis]) + + # first thesis should succeed and have a payload + assert_equal 1, good_thesis.archivematica_payloads.count + + # second thesis should fail and not have a payload + assert_empty bad_thesis.archivematica_payloads + + # third thesis should succeed and have a payload, despite prior failure + assert_equal 1, another_good_thesis.archivematica_payloads.count + end + + test 'does not create payloads if post succeeds but APT fails' do + stub_apt_lambda_200_failure + bad_thesis = theses(:one) + good_thesis = setup_thesis + another_good_thesis = setup_thesis_two + assert_empty bad_thesis.archivematica_payloads + assert_empty good_thesis.archivematica_payloads + assert_empty another_good_thesis.archivematica_payloads + + PreservationSubmissionJob.perform_now([good_thesis, bad_thesis, another_good_thesis]) + + # first thesis should succeed and have a payload + assert_equal 1, good_thesis.archivematica_payloads.count + + # second thesis should fail and not have a payload + assert_empty bad_thesis.archivematica_payloads + + # third thesis should succeed and have a payload, despite prior failure + assert_equal 1, another_good_thesis.archivematica_payloads.count end end diff --git a/test/models/archivematica_payload_test.rb b/test/models/archivematica_payload_test.rb new file mode 100644 index 00000000..6de45836 --- /dev/null +++ b/test/models/archivematica_payload_test.rb @@ -0,0 +1,63 @@ +# == Schema Information +# +# Table name: archivematica_payloads +# +# id :integer not null, primary key +# preservation_status :integer default("unpreserved"), not null +# payload_json :text +# preserved_at :datetime +# thesis_id :integer not null +# created_at :datetime not null +# updated_at :datetime not null +# +require 'test_helper' + +class ArchivematicaPayloadTest < ActiveSupport::TestCase + def setup + @thesis = theses(:published) + @payload = @thesis.archivematica_payloads.create! + end + + test 'new payload has default preservation_status of unpreserved' do + assert_equal 'unpreserved', @payload.preservation_status + end + + test 'payload_json exists and contains valid JSON' do + assert @payload.payload_json.present? + + json = @payload.payload_json + assert json.is_a?(String) # Payload JSON should be a string + + parsed = JSON.parse(json) + assert parsed.is_a?(Hash) # Parsed JSON should be a valid hash + end + + test 'payload_json contains expected structure' do + parsed = JSON.parse(@payload.payload_json) + assert_equal parsed['action'], 'create-bagit-zip' + assert parsed['challenge_secret'] + assert_not_nil parsed['verbose'] + assert parsed['input_files'].is_a?(Array) + assert_equal ['md5'], parsed['checksums_to_generate'] + assert parsed['output_zip_s3_uri'].end_with?('.zip') + assert parsed['compress_zip'].in?([true, false]) + # Check S3 URI format + parsed['input_files'].each do |file| + assert_match %r{^s3://#{ENV.fetch('AWS_S3_BUCKET', nil)}/}, file['uri'] + end + end + + test 'input_files includes thesis files and metadata_csv' do + parsed = JSON.parse(@payload.payload_json) + thesis_files = @thesis.files.count + assert_equal thesis_files + 1, parsed['input_files'].size + assert_equal @thesis.files.first.filename.to_s, parsed['input_files'].first['filepath'] + assert_equal 'metadata/metadata.csv', parsed['input_files'].last['filepath'] + end + + test 'metadata_csv is attached and has correct filename/content type' do + assert @payload.metadata_csv.attached? + assert_equal 'metadata.csv', @payload.metadata_csv.filename.to_s + assert_equal 'text/csv', @payload.metadata_csv.content_type + end +end diff --git a/test/models/submission_information_package_zipper_test.rb b/test/models/submission_information_package_zipper_test.rb deleted file mode 100644 index 2857178f..00000000 --- a/test/models/submission_information_package_zipper_test.rb +++ /dev/null @@ -1,50 +0,0 @@ -require 'test_helper' - -class SubmissionInformationPackageZipperTest < ActiveSupport::TestCase - - # because we need to actually use the file it's easier to attach it in the test rather - # than use our fixtures as the fixtures oddly don't account for the file actually being - # where ActiveStorage expects them to be. We also need this to be a record that looks like - # a published record so we'll use the published fixture, remove the fixtured files, and attach - # one again. - def setup_thesis - thesis = theses(:published) - thesis.files = [] - thesis.save - file = Rails.root.join('test', 'fixtures', 'files', 'registrar_data_small_sample.csv') - thesis.files.attach(io: File.open(file), filename: 'registrar_data_small_sample.csv') - thesis - end - - test 'sip has an attached zipped bag' do - thesis = setup_thesis - sip = thesis.submission_information_packages.create - SubmissionInformationPackageZipper.new(sip) - - assert_equal("application/zip", thesis.submission_information_packages.first.bag.blob.content_type) - end - - # Failure to properly close the handles can allow the zip creation to appear correct but actually be invalid - # this is a regression test to ensure we avoid that situation - # You can see this test fail by removing the `tmpfile.close` from the end of bagamatic. - test 'zip is valid' do - thesis = setup_thesis - sip = thesis.submission_information_packages.create - SubmissionInformationPackageZipper.new(sip) - - blob = thesis.submission_information_packages.last.bag.blob - Zip::File.open(blob.service.send(:path_for, blob.key)) do |zipfile| - assert_nil(zipfile.find_entry("file_not_in_zipfile.txt")) - assert_equal('manifest-md5.txt', zipfile.find_entry("manifest-md5.txt").to_s) - end - end - - test 'sip has the correct key' do - thesis = setup_thesis - sip = thesis.submission_information_packages.create - SubmissionInformationPackageZipper.new(sip) - - blob = thesis.submission_information_packages.last.bag.blob - assert blob.key.starts_with? "etdsip/#{thesis.graduation_year}/#{thesis.graduation_month}-#{thesis.accession_number}" - end -end