Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 46 additions & 13 deletions code/common/compress.q
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,30 @@ showcomp:{[hdbpath;csvpath;maxage]

compressfromtable:{[table]
statstab::([] file:`$(); algo:`int$(); compressedLength:`long$();uncompressedLength:`long$());
{compress[x `fullpath;x `calgo;x `cblocksize;x `clevel; x `currentsize]} each table;}
/ Check if process is single threaded - if multi then compress in parallel then clean up after
// Add metrics on any files due to be compressed to be used afterwards for comparison
table:update compressionvaluepre:{(-21!x)`compressedLength}'[fullpath] from table;
$[0= system"s";
singlethreadcompress[table];
multithreadcompress[table]];
/ Update the stats tab table after the compression
{statstabupdate[x`fullpath;x`calgo;x`currentsize;x`compressionvaluepre]} each table}

statstabupdate:{[file;algo;sizeuncomp;compressionvaluepre]
if[not compressionvaluepre ~ (-21!file)`compressedLength;
statstab,:$[not 0=algo;(file;algo;(-21!file)`compressedLength;sizeuncomp);(file;algo;compressionvaluepre;sizeuncomp)]]}

singlethreadcompress:{[table]
.lg.o[`compression; "Single threaded process, compress applied sequentially"];
{compress[x `fullpath;x `calgo;x `cblocksize;x `clevel; x `currentsize];
cleancompressed[x `fullpath;x `calgo]} each table;
}

multithreadcompress:{[table]
.lg.o[`compression; "Multithreaded process, compress applied in parallel "];
{compress[x `fullpath;x `calgo;x `cblocksize;x `clevel; x `currentsize]} peach table;
{cleancompressed[x `fullpath;x `calgo]} each table;
}

/- call the compression with a max age paramter implemented
compressmaxage:{[hdbpath;csvpath;maxage]
Expand Down Expand Up @@ -160,23 +183,33 @@ compress:{[filetoCompress;algo;blocksize;level;sizeuncomp]
$[((0 = count -21!filetoCompress) & not 0 = algo)|((not 0 = count -21!filetoCompress) & 0 = algo);
[.lg.o[`compression;cmp,"compressing ","file ", (string filetoCompress), " with algo: ", (string algo), ", blocksize: ", (string blocksize), ", and level: ", (string level), "."];
/ perform the compression/decompression
if[0=algo;comprL:(-21!filetoCompress)`compressedLength];
-19!(filetoCompress;compressedFile;blocksize;algo;level);
/ check the compressed/decomp file and move if appropriate; else delete compressed file and log error
$[((get compressedFile)~sf:get filetoCompress) & (count -21!compressedFile) or algo=0;
[.lg.o[`compression;"File ",cmp,"compressed ","successfully; matches orginal. Deleting original."];
system "r ", (last ":" vs string compressedFile)," ", last ":" vs string filetoCompress;
/ move the hash files too.
hashfilecheck[compressedFile;filetoCompress;sf];
/-log to the table if the algo wasn't 0
statstab,:$[not 0=algo;(filetoCompress;algo;(-21!filetoCompress)`compressedLength;sizeuncomp);(filetoCompress;algo;comprL;sizeuncomp)]];
[$[not count -21!compressedFile;
[.lg.o[`compression; "Failed to compress file ",string[filetoCompress]];hdel compressedFile];
[.lg.o[`compression;cmp,"compressed ","file ",string[compressedFile]," doesn't match original. Deleting new file"];hdel compressedFile]]]]
];
/ if already compressed/decompressed, then log that and skip.
.lg.o[`compression; "file ", (string filetoCompress), " is already ",cmp,"compressed",". Skipping this file"]]}

cleancompressed:{[filetoCompress;algo]
compressedFile: hsym `$(string filetoCompress),"_kdbtempzip";
cmp:$[algo=0;"de";""];
// Verify compressed file exists
if[()~ key compressedFile;
.lg.o[`compression; "No compressed file present for the following file - ",string[filetoCompress]];
:();
];
// Verify compressed file's contents match original
if[not ((get compressedFile)~sf:get filetoCompress) & (count -21!compressedFile) or algo=0;
.lg.o[`compression;cmp,"compressed ","file ",string[compressedFile]," doesn't match original. Deleting new file"];
hdel compressedFile;
:();
];
// Given above two checks satisfied run the delete of old and rename compressed to original name
.lg.o[`compression;"File ",cmp,"compressed ",string[filetoCompress]," successfully; matches orginal. Deleting original."];
system "r ", (last ":" vs string compressedFile)," ", last ":" vs string filetoCompress;
/ move the hash files too.
hashfilecheck[compressedFile;filetoCompress;sf];
}


hashfilecheck:{[compressedFile;filetoCompress;sf]
/ if running 3.6 or higher, account for anymap type for nested lists
/ check for double hash file if nested data contains symbol vector/atom
Expand Down