diff --git a/bagit.py b/bagit.py index 93d5ac9..e98feb3 100755 --- a/bagit.py +++ b/bagit.py @@ -146,7 +146,9 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None): bag_info['Payload-Oxum'] = Oxum _make_tag_file('bag-info.txt', bag_info) - _make_tagmanifest_file('tagmanifest-md5.txt', bag_dir) + for c in checksum: + _make_tagmanifest_file(c, bag_dir) + except Exception as e: os.chdir(old_dir) @@ -311,7 +313,8 @@ def save(self, processes=1, manifests=False): _make_tag_file(self.tag_file_name, self.info) # Update tag-manifest for changes to manifest & bag-info files - _make_tagmanifest_file('tagmanifest-md5.txt', self.path) + for alg in self.algs: + _make_tagmanifest_file(alg, self.path) # Reload the manifests self._load_manifests() @@ -759,14 +762,16 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'): return "%s.%s" % (total_bytes, num_files) -def _make_tagmanifest_file(tagmanifest_file, bag_dir): +def _make_tagmanifest_file(alg, bag_dir): + tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + logging.info("writing %s", tagmanifest_file) files = [f for f in listdir(bag_dir) if isfile(join(bag_dir, f))] checksums = [] for f in files: - if f == tagmanifest_file: + if re.match('^tagmanifest-.+\.txt$', f): continue fh = open(join(bag_dir, f), 'rb') - m = hashlib.md5() + m = _hasher(alg) while True: bytes = fh.read(16384) if not bytes: @@ -830,8 +835,7 @@ def _manifest_line_sha256(filename): def _manifest_line_sha512(filename): return _manifest_line(filename, 'sha512') -def _manifest_line(filename, algorithm='md5'): - fh = open(filename, 'rb') +def _hasher(algorithm='md5'): if algorithm == 'md5': m = hashlib.md5() elif algorithm == 'sha1': @@ -840,6 +844,11 @@ def _manifest_line(filename, algorithm='md5'): m = hashlib.sha256() elif algorithm == 'sha512': m = hashlib.sha512() + return m + +def _manifest_line(filename, algorithm='md5'): + fh = open(filename, 'rb') + m = _hasher(algorithm) total_bytes = 0 while True: diff --git a/test.py b/test.py index 7ab5863..80a139d 100644 --- a/test.py +++ b/test.py @@ -269,6 +269,11 @@ def test_validate_optional_tagfile(self): bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) + def test_sha1_tagfile(self): + bag = bagit.make_bag(self.tmpdir, checksum=['sha1']) + self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-sha1.txt'))) + self.assertEqual(bag.entries['bag-info.txt']['sha1'], 'b537642e07abc0c22c428aee65180e97f78e61dc') + def test_validate_unreadable_file(self): bag = bagit.make_bag(self.tmpdir, checksum=["md5"]) os.chmod(j(self.tmpdir, "data/loc/2478433644_2839c5e8b8_o_d.jpg"), 0) @@ -493,6 +498,17 @@ def test_save_baginfo(self): self.assertEqual(b.info["x"], ["a", "b", "c"]) self.assertTrue(bag.is_valid()) + def test_save_baginfo_with_sha1(self): + bag = bagit.make_bag(self.tmpdir, checksum=["sha1", "md5"]) + self.assertTrue(bag.is_valid()) + bag.save() + + bag.info['foo'] = "bar" + bag.save() + + bag = bagit.Bag(self.tmpdir) + self.assertTrue(bag.is_valid()) + def test_save_only_baginfo(self): bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf: @@ -504,7 +520,6 @@ def test_save_only_baginfo(self): self.assertEqual(bag.info["foo"], "bar") self.assertFalse(bag.is_valid()) - def test_make_bag_with_newline(self): bag = bagit.make_bag(self.tmpdir, {"test": "foo\nbar"}) self.assertEqual(bag.info["test"], "foobar")