From: Jonathan Dieter Date: Thu, 9 Aug 2018 11:42:24 +0000 (+0200) Subject: Add contrib program to generate dictionary X-Git-Tag: archive/raspbian/1.1.9+ds1-1+rpi1~1^2~132 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=3a81d00c314c17cea05d773931c9053848135f28;p=zchunk.git Add contrib program to generate dictionary Signed-off-by: Jonathan Dieter --- diff --git a/contrib/gen_xml_dictionary b/contrib/gen_xml_dictionary new file mode 100755 index 0000000..e69bcac --- /dev/null +++ b/contrib/gen_xml_dictionary @@ -0,0 +1,51 @@ +#!/usr/bin/python3 + +import re +import sys +import os.path +import os +import subprocess +import argparse +import shutil +import tempfile +import shutil + +parser = argparse.ArgumentParser(description="Creates a zstd dictionary from a file that will be chunked") +parser.add_argument("split_string", help="String to use to split the file(s)") +parser.add_argument("file", nargs="+", help="File(s) to use to generate the dictionary") +parser.add_argument("-s", "--size", action="store", type=int, default=112640, help="Dictionary size") +args = parser.parse_args() + +temp_dir = tempfile.mkdtemp() + +# Match any series of hex numbers that are 32 bytes or longer +checksum_regex = re.compile("[0-9a-f]{32,}") +try: + dict_file = os.path.basename(args.file[0]).split(".")[0] + ".dict" +except KeyError: + dict_file = os.path.basename(args.file[0]) + ".dict" +for fn in args.file: + f = open(fn, 'r') + data = f.read() + f.close() + data = checksum_regex.sub("", data) + data_list = data.split(args.split_string) + count = 0 + for data in data_list: + filename = "%s/%s.%06i" % (temp_dir, os.path.basename(fn), count) + f = open(filename, 'w') + f.write(args.split_string) + f.write(data) + f.close() + count += 1 + +filelist = os.listdir(temp_dir) +filelist = ["%s/%s" % (temp_dir, f) for f in filelist] +run_cmd = ["zstd", "--train"] + filelist + ["-o", dict_file, "--maxdict=%i" % args.size] +try: + subprocess.run(run_cmd) + shutil.rmtree(temp_dir) +except subprocess.CalledProcessError: + shutil.rmtree(temp_dir) + sys.exit(1) +