forked from NickSto/python-single
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharchive-file.py
More file actions
executable file
·350 lines (311 loc) · 13.3 KB
/
archive-file.py
File metadata and controls
executable file
·350 lines (311 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
#!/usr/bin/env python3
import os
import sys
import time
import shutil
import logging
import argparse
import datetime
assert sys.version_info.major >= 3, 'Python 3 required'
VERSION = 2.0
NOW = int(time.time())
PERIODS = {
# 'minutely': 60,
'hourly': 60*60,
'daily': 24*60*60,
'weekly': 7*24*60*60,
'monthly':int(60*60*24*365.2425/12),
'yearly': int(60*60*24*365.2425),
'forever':NOW-1,
}
DESCRIPTION = """Archive copies of the target file. Keep a set of copies from different time
periods, like the last hour, day, week, month, etc."""
def make_argparser():
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument('file',
help='The file to back up.')
parser.add_argument('-d', '--destination',
help='The directory the archive is/should be stored in. Default is the same directory the '
'target file lives in.')
parser.add_argument('-a', '--archive-tracker')
parser.add_argument('-e', '--ext',
help='The extension of the file. You can use this to make sure the names of the archive files '
'are like "example-2017-03-23-121700.tar.gz" instead of '
'"example.tar-2017-03-23-121700.gz".')
parser.add_argument('-c', '--copies', type=int, default=2,
help='How many copies to keep per time period. Default: %(default)s')
parser.add_argument('-m', '--min-size', type=int,
help='Minimum file size (in bytes). If the target file is smaller than this, do not copy it '
'into the archive.')
parser.add_argument('--now', type=int, default=NOW,
help='The unix timestamp to use as "now". For debugging.')
parser.add_argument('-l', '--log', type=argparse.FileType('w'), default=sys.stderr,
help='Print log messages to this file instead of to stderr. Warning: Will overwrite the file.')
volume = parser.add_mutually_exclusive_group()
volume.add_argument('-q', '--quiet', dest='volume', action='store_const', const=logging.CRITICAL,
default=logging.WARNING)
volume.add_argument('-v', '--verbose', dest='volume', action='store_const', const=logging.INFO)
volume.add_argument('-D', '--debug', dest='volume', action='store_const', const=logging.DEBUG)
return parser
def main(argv):
parser = make_argparser()
args = parser.parse_args(argv[1:])
logging.basicConfig(stream=args.log, level=args.volume, format='%(message)s')
tone_down_logger()
# Check the input paths exist.
if not os.path.isfile(args.file):
fail('Error: Target file {!r} not found.'.format(args.file))
if args.destination and not os.path.isdir(args.destination):
fail('Error: --destination {!r} not found.'.format(args.destination))
if args.min_size and os.path.getsize(args.file) < args.min_size:
fail('Error: Target file {!r} smaller than --min-size ({} < {})'
.format(args.file, os.path.getsize(args.file), args.min_size))
filename = os.path.basename(args.file)
destination = args.destination or os.path.dirname(args.file)
archive_tracker = args.archive_tracker or os.path.join(destination, '.archive-tracker')
# Read the tracker file, get the section on our target file.
if os.path.isfile(archive_tracker):
with open(archive_tracker) as tracker_file:
tracker = read_tracker(tracker_file, periods=PERIODS, expected_version=VERSION)
else:
tracker = {filename:{}}
try:
tracker_section = tracker[filename]
except KeyError:
fail('Error: Target file "{}" not found in tracker {}.'.format(filename, archive_tracker))
# Determine which actions are needed.
new_tracker_section, wanted = get_plan(tracker_section, destination, args.copies, periods=PERIODS,
now=args.now)
# If new archives are needed, copy the target file to use as a new archive file, and update the
# tracker with its path.
if wanted:
archive_file_path = get_archive_path(args.file, destination, args.ext, now=args.now)
logging.info('Copying target file {} to {}'.format(args.file, archive_file_path))
shutil.copy2(args.file, archive_file_path)
add_new_file(new_tracker_section, wanted, archive_file_path, now=args.now)
# Delete the now-unneeded archive files.
files_to_delete = get_files_to_delete(tracker_section, new_tracker_section)
delete_files(files_to_delete, destination)
# Write the updated tracker file.
tracker[filename] = new_tracker_section
write_tracker(tracker, archive_tracker, periods=PERIODS, version=VERSION)
def read_tracker(tracker_file, periods=PERIODS, expected_version=VERSION):
"""
Tracker file format (tab-delimited):
>version=1.0
filename.ext
monthly 1 1380426100 filename-2013-09-28.ext
monthly 2 1376366288 filename-2013-08-12.ext
weekly 2 1380436173 filename-2013-09-29.ext
Returned data structure:
{
'filename.ext': {
'monthly': [
{'timestamp':1380426100, 'file':'filename-2013-09-28.ext'},
{'timestamp':1376366288, 'file':'filename-2013-08-12.ext'}
],
'weekly': [
None,
{'timestamp':1380436173, 'file':'filename-2013-09-17.ext'}
]
}
}
"filename.ext" begins one section, and there can be many sections in one file.
"""
version = None
tracker = {}
section = {}
path = None
for line_raw in tracker_file:
# What kind of line is it?
header = line_raw.startswith('>')
section_header = not line_raw.startswith('\t')
line = line_raw.strip()
# Ignore empty lines.
if not line:
continue
# Check version in header.
if header:
if line.startswith('>version='):
version = float(line[9:])
if version > expected_version or expected_version - version >= 1.0:
fail('Error: tracker file is version {}, which is incompatible with the current version '
'{}'.format(version, expected_version))
continue
# Start a new section.
if section_header:
if not version:
fail('Error: no version specified in tracker file.')
if section and path:
tracker[path] = section
section = {}
path = line
else:
# Parse a data line.
fields = line.split('\t')
if len(fields) == 4:
period = fields[0].lower()
copy = fields[1]
timestamp = fields[2]
filename = fields[3]
else:
fail('Error in tracker file. Wrong number of fields ({}) on line\n{}'
.format(len(fields), line_raw))
if period not in periods:
fail('Error in tracker file. Invalid period "{}" on line\n{}'.format(period, line))
try:
timestamp = int(timestamp)
except ValueError:
fail('Error in tracker file. Invalid timestamp {!r} on line\n{}'.format(timestamp, line))
try:
copy = int(copy)
except ValueError:
fail('Error in tracker file. Invalid copy number {!r} on line\n{}'.format(copy, line))
if copy > 2000:
fail('Error in tracker file. Copy too large ({}) on line\n{}'.format(copy, line))
# Place the record for this line in the list for the period, at a location according to its
# copy number.
copies = section.get(period, [])
while len(copies) < copy:
copies.append(None)
copies[copy-1] = {'timestamp':timestamp, 'file':filename}
section[period] = copies
# Save the last section.
if section and path:
tracker[path] = section
return tracker
def get_plan(tracker_section, destination, required_copies, periods=PERIODS, now=NOW):
"""Determine the changes needed to update the archives.
Returns:
new_tracker_section: An updated tracker section with copies shifted and copy lists extended
where necessary. The original tracker section is not altered.
wanted: Missing archives that need to be created. Each element is a dict with the keys 'period'
and 'copy'."""
new_tracker_section = {}
wanted = []
all_archives = []
# Pool all existing archives.
for period in periods:
for archive in tracker_section.get(period, []):
if archive not in all_archives:
all_archives.append(archive)
for period in get_ordered_periods(periods):
copies = []
# Iterate through each time period, finding which archives are now within that period.
# Choose one for each period (or None, if none exist).
period_length = periods[period]
for i, slot_start_age in enumerate(range(0, period_length*required_copies, period_length)):
# Figure out the boundaries of this time slot.
slot_end_age = slot_start_age + period_length
slot_end = now - slot_start_age
slot_start = now - slot_end_age
candidates = []
for archive in all_archives:
# Check that the archive falls within the time period.
if archive is not None and slot_start < archive['timestamp'] <= slot_end:
# Check that the archive's file exists.
path = os.path.join(destination, archive['file'])
if os.path.isfile(path):
candidates.append(archive)
else:
logging.warning('{} archive is missing (file {!r}).'.format(period, path))
if candidates:
# Choose the oldest archive, if there are multiple.
candidates.sort(key=lambda archive: archive['timestamp'])
copies.append(candidates[0].copy())
else:
logging.debug('No existing archive can serve as {} copy {}.'.format(period, i+1))
if i+1 == 1:
# Add it to the wanted list if it's copy 1.
# If it's not copy 1, then making a new backup and calling it copy 2, for example, would
# end up with a copy 2 younger than copy 1. Or, if we don't have a copy 1 already, we'll
# be getting one shortly, which will end up being the same file as this copy 2 anyway.
wanted.append({'period':period, 'copy':i+1})
copies.append(None)
new_tracker_section[period] = copies
return new_tracker_section, wanted
def get_archive_path(target_path, destination, ext=None, now=NOW):
filename = os.path.basename(target_path)
if ext is None:
base, ext = os.path.splitext(filename)
else:
if not ext.startswith('.'):
ext = '.'+ext
if filename.endswith(ext):
base = filename[:-len(ext)]
time_str = datetime.datetime.fromtimestamp(now).strftime('%Y-%m-%d-%H%M%S')
archive_filename = base+'-'+time_str+ext
return os.path.join(destination, archive_filename)
def add_new_file(tracker_section, wanted, archive_file_path, now=NOW):
"""Add the path for a new archive file to the tracker, in the places indicated by the wanted list.
"""
logging.info('Saving as '+', '.join(['{period} copy {copy}'.format(**w) for w in wanted]))
filename = os.path.basename(archive_file_path)
for wanted_archive in wanted:
period = wanted_archive['period']
copy = wanted_archive['copy']
copies = tracker_section.get(period, [])
while len(copies) < copy:
copies.append(None)
copies[copy-1] = {'timestamp':now, 'file':filename}
tracker_section[period] = copies
def get_files_to_delete(tracker_section, new_tracker_section):
old_files = get_files_in_tracker_section(tracker_section)
new_files = get_files_in_tracker_section(new_tracker_section)
return old_files - new_files
def get_files_in_tracker_section(tracker_section):
files = set()
for period, copies in tracker_section.items():
for archive in copies:
if archive is not None:
files.add(archive['file'])
return files
def delete_files(files_to_delete, destination):
if files_to_delete:
logging.info('Deleting old archive files: "'+'", "'.join(files_to_delete)+'"')
for filename in files_to_delete:
path = os.path.join(destination, filename)
if os.path.isfile(path):
logging.debug('Deleting old archive file {!r}'.format(filename))
try:
os.remove(path)
except OSError:
fail('Error: Could not delete file {!r}.'.format(path))
else:
logging.warning('Warning: Could not find file {!r}'.format(path))
def get_ordered_periods(periods=PERIODS):
ordered_periods = []
for period, age in sorted(periods.items(), key=lambda i: i[1]):
ordered_periods.append(period)
return ordered_periods
def write_tracker(tracker, tracker_path, periods=PERIODS, version=VERSION):
ordered_periods = get_ordered_periods(periods)
try:
with open(tracker_path, 'w') as tracker_file:
tracker_file.write('>version={}\n'.format(version))
for path, section in tracker.items():
tracker_file.write(path+'\n')
for period in ordered_periods:
copies = section.get(period, [])
for i, archive in enumerate(copies):
if archive is not None:
tracker_file.write('\t{}\t{}\t{timestamp}\t{file}\n'.format(period, i+1, **archive))
except IOError:
fail('Could not open file {!r}'.format(tracker_path))
def tone_down_logger():
"""Change the logging level names from all-caps to capitalized lowercase.
E.g. "WARNING" -> "Warning" (turn down the volume a bit in your log files)"""
for level in (logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG):
level_name = logging.getLevelName(level)
logging.addLevelName(level, level_name.capitalize())
def fail(message):
logging.critical(message)
if __name__ == '__main__':
sys.exit(1)
else:
raise Exception('Unrecoverable error')
if __name__ == '__main__':
try:
sys.exit(main(sys.argv))
except BrokenPipeError:
pass