Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
bag_info['Payload-Oxum'] = Oxum
_make_tag_file('bag-info.txt', bag_info)

_make_tagmanifest_file('tagmanifest-md5.txt', bag_dir)
for c in checksum:
_make_tagmanifest_file(c, bag_dir)


except Exception as e:
os.chdir(old_dir)
Expand Down Expand Up @@ -311,7 +313,8 @@ def save(self, processes=1, manifests=False):
_make_tag_file(self.tag_file_name, self.info)

# Update tag-manifest for changes to manifest & bag-info files
_make_tagmanifest_file('tagmanifest-md5.txt', self.path)
for alg in self.algs:
_make_tagmanifest_file(alg, self.path)

# Reload the manifests
self._load_manifests()
Expand Down Expand Up @@ -759,14 +762,16 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
return "%s.%s" % (total_bytes, num_files)


def _make_tagmanifest_file(tagmanifest_file, bag_dir):
def _make_tagmanifest_file(alg, bag_dir):
tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg)
logging.info("writing %s", tagmanifest_file)
files = [f for f in listdir(bag_dir) if isfile(join(bag_dir, f))]
checksums = []
for f in files:
if f == tagmanifest_file:
if re.match('^tagmanifest-.+\.txt$', f):
continue
fh = open(join(bag_dir, f), 'rb')
m = hashlib.md5()
m = _hasher(alg)
while True:
bytes = fh.read(16384)
if not bytes:
Expand Down Expand Up @@ -830,8 +835,7 @@ def _manifest_line_sha256(filename):
def _manifest_line_sha512(filename):
return _manifest_line(filename, 'sha512')

def _manifest_line(filename, algorithm='md5'):
fh = open(filename, 'rb')
def _hasher(algorithm='md5'):
if algorithm == 'md5':
m = hashlib.md5()
elif algorithm == 'sha1':
Expand All @@ -840,6 +844,11 @@ def _manifest_line(filename, algorithm='md5'):
m = hashlib.sha256()
elif algorithm == 'sha512':
m = hashlib.sha512()
return m

def _manifest_line(filename, algorithm='md5'):
fh = open(filename, 'rb')
m = _hasher(algorithm)

total_bytes = 0
while True:
Expand Down
17 changes: 16 additions & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ def test_validate_optional_tagfile(self):
bag = bagit.Bag(self.tmpdir)
self.assertRaises(bagit.BagValidationError, self.validate, bag)

def test_sha1_tagfile(self):
bag = bagit.make_bag(self.tmpdir, checksum=['sha1'])
self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-sha1.txt')))
self.assertEqual(bag.entries['bag-info.txt']['sha1'], 'b537642e07abc0c22c428aee65180e97f78e61dc')

def test_validate_unreadable_file(self):
bag = bagit.make_bag(self.tmpdir, checksum=["md5"])
os.chmod(j(self.tmpdir, "data/loc/2478433644_2839c5e8b8_o_d.jpg"), 0)
Expand Down Expand Up @@ -493,6 +498,17 @@ def test_save_baginfo(self):
self.assertEqual(b.info["x"], ["a", "b", "c"])
self.assertTrue(bag.is_valid())

def test_save_baginfo_with_sha1(self):
bag = bagit.make_bag(self.tmpdir, checksum=["sha1", "md5"])
self.assertTrue(bag.is_valid())
bag.save()

bag.info['foo'] = "bar"
bag.save()

bag = bagit.Bag(self.tmpdir)
self.assertTrue(bag.is_valid())

def test_save_only_baginfo(self):
bag = bagit.make_bag(self.tmpdir)
with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf:
Expand All @@ -504,7 +520,6 @@ def test_save_only_baginfo(self):
self.assertEqual(bag.info["foo"], "bar")
self.assertFalse(bag.is_valid())


def test_make_bag_with_newline(self):
bag = bagit.make_bag(self.tmpdir, {"test": "foo\nbar"})
self.assertEqual(bag.info["test"], "foobar")
Expand Down