From 00e0854157a462af94b5dc7971d285bccd496761 Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Sun, 31 May 2020 00:43:02 +0300 Subject: [PATCH] Add a "compile a program" autopkgtest, fix -dev deps. --- debian/control | 2 +- debian/tests/chunk.c | 108 ++++++++++++ debian/tests/compile | 391 +++++++++++++++++++++++++++++++++++++++++++ debian/tests/control | 4 + 4 files changed, 504 insertions(+), 1 deletion(-) create mode 100644 debian/tests/chunk.c create mode 100755 debian/tests/compile diff --git a/debian/control b/debian/control index 83df0c7..f7dceb6 100644 --- a/debian/control +++ b/debian/control @@ -52,7 +52,7 @@ Package: libzck-dev Section: libdevel Architecture: any Multi-Arch: same -Depends: ${misc:Depends}, libzck1 (= ${binary:Version}) +Depends: ${misc:Depends}, libzck1 (= ${binary:Version}), libzstd-dev Description: compress a file into independent chunks - development files zchunk is a compressed file format that splits the file into independent chunks. This allows one to only download changed chunks when diff --git a/debian/tests/chunk.c b/debian/tests/chunk.c new file mode 100644 index 0000000..4201ced --- /dev/null +++ b/debian/tests/chunk.c @@ -0,0 +1,108 @@ +#include +#include +#include +#include + +#include + +struct chunk_stuff { + zckChunk *chunk; + size_t size; +}; + +static struct chunk_stuff +get_second_chunk(struct zckCtx * const zck, const size_t chunk_count) +{ + size_t idx = 0; + zckChunk *chunk = zck_get_first_chunk(zck); + if (chunk == NULL) + errx(1, "zck_get_first_chunk() failed: %s", zck_get_error(zck)); + printf("got first chunk %p\n", chunk); + + size_t start = 0; + for (size_t idx = 0; idx < chunk_count; idx++) { + const ssize_t s_size = zck_get_chunk_size(chunk); + if (s_size < 0) + errx(1, "zck_get_chunk_size() returned invalid size %zd: %s", s_size, zck_get_error(zck)); + const size_t size = (size_t)s_size; + printf("chunk %zu: start %zu size %zu\n", idx, start, size); + + if (size > 0 && start > 0) { + printf("got it!\n"); + return (struct chunk_stuff){ + .chunk = chunk, + .size = size, + }; + } + start += size; + + chunk = zck_get_next_chunk(chunk); + if (chunk == NULL) + errx(1, "get_next_chunk() failed for %zu: %s", idx + 1, zck_get_error(zck)); + } + errx(1, "Could not find the second chunk!"); +} + +int main(const int argc, char * const argv[]) +{ + if (argc != 3) + errx(1, "Usage: chunk /path/to/file.zck /path/to/chunk.txt"); + + const char * const src_name = argv[1]; + const int src_fd = open(src_name, O_RDONLY); + if (src_fd == -1) + err(1, "Could not open %s", src_name); + + struct zckCtx *zck = zck_create(); + if (zck == NULL) + err(1, "zck_create() failed"); + printf("got zck context %p\n", zck); + if (!zck_init_read(zck, src_fd)) + err(1, "zck_init_read() failed"); + + const ssize_t header_len = zck_get_header_length(zck); + if (header_len < 1) + errx(1, "Invalid header length %zd", header_len); + printf("header length %zd\n", header_len); + const ssize_t s_chunk_count = zck_get_chunk_count(zck); + if (s_chunk_count < 1) + errx(1, "Invalid chunk count %zd", s_chunk_count); + const size_t chunk_count = (size_t)s_chunk_count; + printf("chunk count %zu\n", chunk_count); + + const struct chunk_stuff second = get_second_chunk(zck, chunk_count); + printf("got second chunk %p size %zu\n", second.chunk, second.size); + char * const data = malloc(second.size); + if (data == NULL) + err(1, "Could not allocate %zu bytes", second.size); + const ssize_t nread = zck_get_chunk_data(second.chunk, data, second.size); + if (nread != (ssize_t)second.size) + errx(1, "zck_get_chunk_data() returned %zd: %s", nread, zck_get_error(zck)); + printf("got the data: %02x %02x %02x\n", data[0], data[1], data[2]); + + zck_free(&zck); + if (zck != NULL) + errx(1, "zck_free() did not zero the pointer"); + if (close(src_fd) == -1) + err(1, "Could not close %s after reading", src_name); + + const char * const dst_name = argv[2]; + printf("About to write %zu bytes to %s\n", second.size, dst_name); + const int dst_fd = open(dst_name, O_WRONLY | O_CREAT, 0644); + if (dst_fd == -1) + err(1, "Could not open %s for writing", dst_name); + + size_t nwritten = 0; + while (nwritten < second.size) { + printf("- %zu bytes left to write\n", second.size - nwritten); + const ssize_t n = write(dst_fd, data + nwritten, second.size - nwritten); + if (n < 1) + err(1, "Could not write to %s", dst_name); + printf("- wrote %zd bytes\n", n); + nwritten += n; + } + if (close(dst_fd) == -1) + err(1, "Could not close %s after writing", dst_name); + printf("Whee!\n"); + return 0; +} diff --git a/debian/tests/compile b/debian/tests/compile new file mode 100755 index 0000000..7986557 --- /dev/null +++ b/debian/tests/compile @@ -0,0 +1,391 @@ +#!/usr/bin/python3 +"""Compile a test program.""" + +import argparse +import dataclasses +import os +import pathlib +import re +import subprocess +import sys +import tempfile + +from typing import Callable, Dict, List + + +MAGIC = bytes([0, ord("Z"), ord("C"), ord("K"), ord("1")]) + +RE_DATA_SIZE = re.compile( + r""" ^ + Data \s+ size \s* : \s* + (?P 0 | [1-9][0-9]* ) + \s* + $ """, + re.X, +) + +RE_CHUNK_COUNT = re.compile( + r""" ^ + Chunk \s+ count \s* : \s* + (?P 0 | [1-9][0-9]* ) + \s* + $ """, + re.X, +) + +RE_CHUNKS = re.compile( + r""" ^ + \s+ + Chunk \s+ + Checksum \s+ + Start \s+ + Comp \s size \s+ + Size \s* + $ """, + re.X, +) + +RE_CHUNK = re.compile( + r""" ^ + \s+ + (?P 0 | [1-9][0-9]* ) \s+ + (?P \S+ ) \s+ + (?P 0 | [1-9][0-9]* ) \s+ + (?P 0 | [1-9][0-9]* ) \s+ + (?P 0 | [1-9][0-9]* ) \s* + $ """, + re.X, +) + + +@dataclasses.dataclass(frozen=True) +class Config: + """Runtime configuration.""" + + # pylint: disable=too-many-instance-attributes + + tempd: pathlib.Path + source: pathlib.Path + obj: pathlib.Path + program: pathlib.Path + env: Dict[str, str] + + orig: pathlib.Path + compressed: pathlib.Path + uncompressed: pathlib.Path + + +@dataclasses.dataclass(frozen=True) +class Chunk: + """A single chunk descriptor.""" + + cstart: int + start: int + csize: int + size: int + cend: int + end: int + + +def get_runenv() -> Dict[str, str]: + """Set up the environment for running the zchunk programs.""" + env = dict(os.environ) + env["LC_ALL"] = "C.UTF-8" + env["LANGUAGE"] = "" + return env + + +def parse_args(dirname: str) -> Config: + """Parse the command-line arguments, deduce some things.""" + parser = argparse.ArgumentParser(prog="dictionary") + parser.add_argument( + "source", type=str, help="path to the test program source file", + ) + parser.add_argument( + "filename", type=str, help="path to the filename to compress" + ) + + args = parser.parse_args() + + tempd = pathlib.Path(dirname).absolute() + return Config( + tempd=tempd, + source=pathlib.Path(args.source), + obj=tempd / "chunk.o", + program=tempd / "chunk", + env=get_runenv(), + orig=pathlib.Path(args.filename).absolute(), + compressed=tempd / "words.txt.zck", + uncompressed=tempd / "chunk.txt", + ) + + +def do_compile(cfg: Config) -> None: + """Compile the test program.""" + print("Fetching the C compiler flags for zck") + cflags = ( + subprocess.check_output( + ["pkg-config", "--cflags", "zck"], shell=False, env=cfg.env + ) + .decode("UTF-8") + .rstrip("\r\n") + ) + if "\r" in cflags or "\n" in cflags: + sys.exit(f"`pkg-config --cflags zck` returned {cflags!r}") + + if cfg.obj.exists(): + sys.exit(f"Did not expect {cfg.obj} to exist") + cmd = f"cc -c -o '{cfg.obj}' {cflags} '{cfg.source}'" + print(f"Running {cmd!r}") + subprocess.check_call(cmd, shell=True, env=cfg.env) + if not cfg.obj.is_file(): + sys.exit(f"{cmd!r} did not create the {cfg.obj} file") + + print("Fetching the C linker flags and libraries for zck") + libs = ( + subprocess.check_output( + ["pkg-config", "--libs", "zck"], shell=False, env=cfg.env + ) + .decode("UTF-8") + .rstrip("\r\n") + ) + if "\r" in libs or "\n" in libs: + sys.exit(f"`pkg-config --libs zck` returned {libs!r}") + + if cfg.program.exists(): + sys.exit(f"Did not expect {cfg.program} to exist") + cmd = f"cc -o '{cfg.program}' '{cfg.obj}' {libs}" + print(f"Running {cmd!r}") + subprocess.check_call(cmd, shell=True, env=cfg.env) + if not cfg.program.is_file(): + sys.exit(f"{cmd!r} did not create the {cfg.program} file") + if not os.access(cfg.program, os.X_OK): + sys.exit(f"Not an executable file: {cfg.program}") + print(f"Looks like we got {cfg.program}") + + +def do_compress(cfg: Config, orig_size: int) -> int: + """Compress the original file.""" + print(f"About to compress {cfg.orig} to {cfg.compressed}") + if cfg.compressed.exists(): + sys.exit(f"Did not expect {cfg.compressed} to exist") + subprocess.check_call( + ["zck", "-o", cfg.compressed, "--", cfg.orig], + shell=False, + env=cfg.env, + ) + if not cfg.compressed.is_file(): + sys.exit(f"zck did not create the {cfg.compressed} file") + comp_size = cfg.compressed.stat().st_size + print(f"{cfg.compressed} size is {comp_size} bytes long") + if comp_size >= orig_size: + sys.exit( + f"sizeof({cfg.compressed}) == {comp_size} : " + f"sizeof({cfg.orig}) == {orig_size}" + ) + start = cfg.compressed.open(mode="rb").read(5) + print(f"{cfg.compressed} starts with {start!r}") + if start != MAGIC: + sys.exit(f"{cfg.compressed} does not start with {MAGIC!r}: {start!r}") + + return comp_size + + +def read_chunks(cfg: Config, orig_size: int, comp_size: int) -> Chunk: + """Parse the chunks of the compressed file.""" + # pylint: disable=too-many-statements + output = subprocess.check_output( + ["zck_read_header", "-c", "--", cfg.compressed], + shell=False, + env=cfg.env, + ).decode("UTF-8") + + params: Dict[str, int] = {} + chunks: List[Chunk] = [] + + def ignore_till_end(line: str) -> str: + """Ignore anything until EOF.""" + raise NotImplementedError(line) + + def parse_chunk(line: str) -> str: + """Parse a single chunk line.""" + # pylint: disable=too-many-branches + data = RE_CHUNK.match(line) + if not data: + sys.exit(f"Unexpected line for chunk {len(chunks)}: {line!r}") + idx = int(data.group("idx")) + start = int(data.group("start")) + csize = int(data.group("comp_size")) + size = int(data.group("size")) + + if idx != len(chunks): + sys.exit(f"Expected index {len(chunks)}: {line!r}") + if chunks: + last_chunk = chunks[-1] + if start != last_chunk.cend: + sys.exit(f"Expected start {last_chunk.cend}: {line!r}") + else: + if start != params["size_diff"]: + sys.exit(f"Expected start {params['size_diff']}: {line!r}") + last_chunk = Chunk( + cstart=0, + start=0, + csize=0, + size=0, + cend=params["size_diff"], + end=0, + ) + + next_chunk = Chunk( + cstart=start, + start=last_chunk.end, + csize=csize, + size=size, + cend=last_chunk.cend + csize, + end=last_chunk.end + size, + ) + if next_chunk.cend > comp_size: + sys.exit( + f"Compressed size overflow: {next_chunk.cend} > {comp_size}" + ) + + more = idx + 1 != params["chunk_count"] + if more: + if next_chunk.end >= orig_size: + sys.exit( + f"Original size overflow: " + f"{next_chunk.end} >= {orig_size}" + ) + else: + if next_chunk.cend != comp_size: + sys.exit( + f"Compressed size mismatch: " + f"{next_chunk.cend} != {comp_size}" + ) + if next_chunk.end != orig_size: + sys.exit( + f"Original size mismatch: " + f"{next_chunk.end} != {orig_size}" + ) + + print(f"- appending {next_chunk!r}") + chunks.append(next_chunk) + + if more: + return "parse_chunk" + return "ignore_till_end" + + def wait_for_chunks(line: str) -> str: + """Wait for the 'Chunks:' line.""" + if not RE_CHUNKS.match(line): + return "wait_for_chunks" + + return "parse_chunk" + + def wait_for_chunk_count(line: str) -> str: + """Wait for the 'chunk count' line.""" + data = RE_CHUNK_COUNT.match(line) + if not data: + return "wait_for_chunk_count" + print(f"- got a chunk count: {data.groupdict()!r}") + + count = int(data.group("count")) + if count < 1: + sys.exit(f"zck_read_header said chunk count {count}") + params["chunk_count"] = count + + return "wait_for_chunks" + + def wait_for_total_size(line: str) -> str: + """Wait for the 'data size' line.""" + data = RE_DATA_SIZE.match(line) + if not data: + return "wait_for_total_size" + print(f"- got a size line: {data.groupdict()!r}") + + size = int(data.group("size")) + if size < 1 or size > comp_size: + sys.exit( + f"zck_read_header said data size {size} (comp {comp_size})" + ) + params["size_diff"] = comp_size - size + + return "wait_for_chunk_count" + + handlers: Dict[str, Callable[[str], str]] = { + func.__name__: func + for func in ( + wait_for_total_size, + wait_for_chunk_count, + wait_for_chunks, + parse_chunk, + ignore_till_end, + ) + } + + handler: Callable[[str], str] = wait_for_total_size + + for line in output.splitlines(): + print(f"- read a line: {line}") + new_handler = handler(line) + assert new_handler in handlers, new_handler + handler = handlers[new_handler] + + if handler != ignore_till_end: # pylint: disable=comparison-with-callable + sys.exit(f"handler is {handler!r} instead of {ignore_till_end!r}") + + # Now let's find the second chunk + return next(chunk for chunk in chunks if chunk.start > 0) + + +def run_program(cfg: Config) -> None: + """Run the test program, hopefully generate the chunk file.""" + print(f"About to run {cfg.program}") + if cfg.uncompressed.exists(): + sys.exit(f"Did not expect {cfg.uncompressed} to exist") + subprocess.check_call( + [cfg.program, cfg.compressed, cfg.uncompressed], + shell=False, + env=cfg.env, + ) + if not cfg.uncompressed.is_file(): + sys.exit(f"{cfg.program} did not create the {cfg.uncompressed} file") + + +def compare_chunk(cfg: Config, second: Chunk, orig_size: int) -> None: + """Read data from the input file and the chunk.""" + # OK, let's load it all into memory, mmkay? + contents = cfg.orig.read_bytes() + if len(contents) != orig_size: + sys.exit( + f"Could not read {orig_size} bytes from {cfg.orig}, " + f"read {len(contents)}" + ) + chunk = cfg.uncompressed.read_bytes() + if len(chunk) != second.size: + sys.exit( + f"Could not read {second.size} bytes from {cfg.uncompressed}, " + f"read {len(chunk)}" + ) + + if contents[second.start : second.start + second.size] != chunk: + sys.exit("Mismatch!") + + +def main() -> None: + """Parse arguments, compile a program, compress a file, test it.""" + with tempfile.TemporaryDirectory() as dirname: + print(f"Using temporary directory {dirname}") + cfg = parse_args(dirname) + do_compile(cfg) + orig_size = cfg.orig.stat().st_size + print(f"Original file size: {orig_size}") + comp_size = do_compress(cfg, orig_size) + second_chunk = read_chunks(cfg, orig_size, comp_size) + run_program(cfg) + compare_chunk(cfg, second_chunk, orig_size) + print("Seems fine!") + + +if __name__ == "__main__": + main() diff --git a/debian/tests/control b/debian/tests/control index eb33897..aeaf048 100644 --- a/debian/tests/control +++ b/debian/tests/control @@ -1,3 +1,7 @@ Test-Command: debian/tests/dictionary /usr/bin /usr/share/dict/american-english Depends: @, python3, wamerican Features: test-name=debian-dict + +Test-Command: debian/tests/compile debian/tests/chunk.c /usr/share/dict/american-english +Depends: @, build-essential, pkg-config, python3, wamerican +Features: test-name=debian-compile -- 2.30.2