From f1f8dd2c7c9d388006f38dc37defdf66613a8014 Mon Sep 17 00:00:00 2001 From: Antonio Valentino Date: Sat, 16 Jun 2018 10:53:16 +0000 Subject: [PATCH] Import pytables_3.4.4.orig.tar.gz [dgit import orig pytables_3.4.4.orig.tar.gz] --- .gitignore | 17 + .readthedocs.yml | 7 + .travis.yml | 36 + ANNOUNCE.txt.in | 72 + CITATION.bib | 6 + LICENSE.txt | 35 + LICENSES/BLOSC.txt | 23 + LICENSES/CLOUD-SPTHEME.txt | 47 + LICENSES/FASTLZ.txt | 24 + LICENSES/H5PY.txt | 34 + LICENSES/HDF5.txt | 69 + LICENSES/LZ4.txt | 32 + LICENSES/SNAPPY.txt | 28 + LICENSES/STDINT.txt | 26 + LICENSES/WIN32PTHREADS.txt | 19 + LICENSES/ZLIB.txt | 22 + LICENSES/ZSTD.TXT | 30 + MANIFEST.in | 28 + Makefile | 52 + README.rst | 152 + RELEASE_NOTES.txt | 112 + THANKS | 86 + VERSION | 1 + appveyor.yml | 77 + bench/LRU-experiments.py | 98 + bench/LRU-experiments2.py | 57 + bench/LRUcache-node-bench.py | 80 + bench/bench-postgres-ranges.sh | 14 + bench/bench-pytables-ranges.sh | 30 + bench/bench-pytables.sh | 28 + bench/blosc.py | 165 + bench/bsddb-table-bench.py | 262 + bench/cacheout.py | 13 + bench/chunkshape-bench.py | 61 + bench/chunkshape-testing.py | 108 + bench/collations.py | 124 + bench/copy-bench.py | 33 + bench/create-large-number-objects.py | 42 + bench/deep-tree-h5py.py | 119 + bench/deep-tree.py | 129 + bench/evaluate.py | 174 + bench/expression.py | 179 + bench/get-figures-ranges.py | 232 + bench/get-figures.py | 292 + bench/indexed_search.py | 462 ++ bench/keysort.py | 33 + bench/lookup_bench.py | 241 + bench/open_close-bench.py | 236 + bench/opteron-stress-test.txt | 63 + bench/optimal-chunksize.py | 126 + bench/plot-bar.py | 119 + bench/plot-comparison-lzo-zlib-ucl.gnuplot | 27 + bench/plot-comparison-psyco-lzo.gnuplot | 28 + bench/poly.py | 196 + bench/postgres-search-bench.py | 248 + bench/postgres_backend.py | 156 + bench/pytables-search-bench.py | 221 + bench/pytables_backend.py | 191 + bench/recarray2-test.py | 106 + bench/search-bench-plot.py | 148 + bench/search-bench-rnd.sh | 122 + bench/search-bench.py | 524 ++ bench/search-bench.sh | 123 + bench/searchsorted-bench.py | 340 + bench/searchsorted-bench2.py | 340 + bench/shelve-bench.py | 199 + bench/split-file.py | 40 + bench/sqlite-search-bench-rnd.sh | 105 + bench/sqlite-search-bench.py | 455 ++ bench/sqlite-search-bench.sh | 96 + bench/sqlite3-search-bench.py | 190 + bench/stress-test.py | 401 + bench/stress-test2.py | 238 + bench/stress-test3.py | 290 + bench/table-bench.py | 424 ++ bench/table-copy.py | 116 + bench/undo_redo.py | 234 + bench/undo_redo.txt | 103 + bench/widetree.py | 127 + bench/widetree2.py | 115 + bench/woody-pentiumIV.txt | 189 + c-blosc/ANNOUNCE.rst | 54 + c-blosc/CMakeLists.txt | 358 + c-blosc/LICENSES/BITSHUFFLE.txt | 21 + c-blosc/LICENSES/BLOSC.txt | 30 + c-blosc/LICENSES/FASTLZ.txt | 24 + c-blosc/LICENSES/LZ4.txt | 32 + c-blosc/LICENSES/SNAPPY.txt | 28 + c-blosc/LICENSES/STDINT.txt | 29 + c-blosc/LICENSES/ZLIB.txt | 22 + c-blosc/README.md | 196 + c-blosc/README_HEADER.rst | 65 + c-blosc/README_THREADED.rst | 33 + c-blosc/RELEASE_NOTES.rst | 965 +++ c-blosc/RELEASING.rst | 111 + c-blosc/THANKS.rst | 35 + c-blosc/THOUGHTS_FOR_2.0.txt | 19 + c-blosc/appveyor.yml | 34 + c-blosc/appveyor/run_with_env.cmd | 88 + c-blosc/bench/CMakeLists.txt | 123 + c-blosc/bench/Makefile | 40 + c-blosc/bench/Makefile.mingw | 45 + c-blosc/bench/bench.c | 584 ++ c-blosc/bench/plot-speeds.py | 223 + c-blosc/blosc.pc.in | 14 + c-blosc/blosc/CMakeLists.txt | 221 + c-blosc/blosc/bitshuffle-avx2.c | 248 + c-blosc/blosc/bitshuffle-avx2.h | 38 + c-blosc/blosc/bitshuffle-generic.c | 221 + c-blosc/blosc/bitshuffle-generic.h | 161 + c-blosc/blosc/bitshuffle-sse2.c | 467 ++ c-blosc/blosc/bitshuffle-sse2.h | 52 + c-blosc/blosc/blosc-common.h | 80 + c-blosc/blosc/blosc-export.h | 45 + c-blosc/blosc/blosc.c | 2186 ++++++ c-blosc/blosc/blosc.h | 512 ++ c-blosc/blosc/blosclz.c | 528 ++ c-blosc/blosc/blosclz.h | 64 + c-blosc/blosc/config.h.in | 11 + c-blosc/blosc/fastcopy.c | 504 ++ c-blosc/blosc/fastcopy.h | 19 + c-blosc/blosc/shuffle-avx2.c | 757 ++ c-blosc/blosc/shuffle-avx2.h | 36 + c-blosc/blosc/shuffle-generic.c | 25 + c-blosc/blosc/shuffle-generic.h | 99 + c-blosc/blosc/shuffle-sse2.c | 626 ++ c-blosc/blosc/shuffle-sse2.h | 36 + c-blosc/blosc/shuffle.c | 442 ++ c-blosc/blosc/shuffle.h | 67 + c-blosc/blosc/win32/pthread.c | 218 + c-blosc/blosc/win32/pthread.h | 92 + c-blosc/blosc/win32/stdint-windows.h | 259 + c-blosc/build.py | 10 + c-blosc/cmake/FindLZ4.cmake | 10 + c-blosc/cmake/FindSnappy.cmake | 10 + c-blosc/cmake/FindZstd.cmake | 10 + c-blosc/cmake_uninstall.cmake.in | 22 + c-blosc/compat/CMakeLists.txt | 35 + c-blosc/compat/README.rst | 4 + c-blosc/compat/blosc-1.11.1-blosclz.cdata | Bin 0 -> 26736 bytes c-blosc/compat/blosc-1.11.1-lz4.cdata | Bin 0 -> 33610 bytes c-blosc/compat/blosc-1.11.1-lz4hc.cdata | Bin 0 -> 32485 bytes c-blosc/compat/blosc-1.11.1-snappy.cdata | Bin 0 -> 199683 bytes c-blosc/compat/blosc-1.11.1-zlib.cdata | Bin 0 -> 16176 bytes c-blosc/compat/blosc-1.11.1-zstd.cdata | Bin 0 -> 3569 bytes c-blosc/compat/blosc-1.14.0-blosclz.cdata | Bin 0 -> 36256 bytes c-blosc/compat/blosc-1.14.0-lz4.cdata | Bin 0 -> 36267 bytes c-blosc/compat/blosc-1.14.0-lz4hc.cdata | Bin 0 -> 36263 bytes c-blosc/compat/blosc-1.14.0-snappy.cdata | Bin 0 -> 199855 bytes c-blosc/compat/blosc-1.14.0-zlib.cdata | Bin 0 -> 20530 bytes c-blosc/compat/blosc-1.14.0-zstd.cdata | Bin 0 -> 3569 bytes c-blosc/compat/blosc-1.3.0-blosclz.cdata | Bin 0 -> 36349 bytes c-blosc/compat/blosc-1.3.0-lz4.cdata | Bin 0 -> 36267 bytes c-blosc/compat/blosc-1.3.0-lz4hc.cdata | Bin 0 -> 31963 bytes c-blosc/compat/blosc-1.3.0-snappy.cdata | Bin 0 -> 199935 bytes c-blosc/compat/blosc-1.3.0-zlib.cdata | Bin 0 -> 14956 bytes c-blosc/compat/blosc-1.7.0-blosclz.cdata | Bin 0 -> 36394 bytes c-blosc/compat/blosc-1.7.0-lz4.cdata | Bin 0 -> 36267 bytes c-blosc/compat/blosc-1.7.0-lz4hc.cdata | Bin 0 -> 31963 bytes c-blosc/compat/blosc-1.7.0-snappy.cdata | Bin 0 -> 199935 bytes c-blosc/compat/blosc-1.7.0-zlib.cdata | Bin 0 -> 14956 bytes c-blosc/compat/filegen.c | 110 + c-blosc/conanfile.py | 73 + c-blosc/examples/README.rst | 12 + c-blosc/examples/many_compressors.c | 117 + c-blosc/examples/multithread.c | 100 + c-blosc/examples/noinit.c | 87 + c-blosc/examples/simple.c | 88 + c-blosc/examples/win-dynamic-linking.c | 128 + c-blosc/internal-complibs/lz4-1.8.1.2/lz4.c | 1564 ++++ c-blosc/internal-complibs/lz4-1.8.1.2/lz4.h | 479 ++ c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.c | 893 +++ c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.h | 272 + .../internal-complibs/lz4-1.8.1.2/lz4opt.h | 356 + .../snappy-1.1.1/add-version.patch | 19 + .../snappy-1.1.1/msvc1.patch | 17 + .../snappy-1.1.1/msvc2.patch | 27 + .../snappy-1.1.1/snappy-c.cc | 90 + .../internal-complibs/snappy-1.1.1/snappy-c.h | 146 + .../snappy-1.1.1/snappy-internal.h | 150 + .../snappy-1.1.1/snappy-sinksource.cc | 71 + .../snappy-1.1.1/snappy-sinksource.h | 137 + .../snappy-1.1.1/snappy-stubs-internal.cc | 42 + .../snappy-1.1.1/snappy-stubs-internal.h | 491 ++ .../snappy-1.1.1/snappy-stubs-public.h | 111 + .../internal-complibs/snappy-1.1.1/snappy.cc | 1306 ++++ .../internal-complibs/snappy-1.1.1/snappy.h | 192 + .../internal-complibs/zlib-1.2.8/adler32.c | 179 + .../internal-complibs/zlib-1.2.8/compress.c | 80 + c-blosc/internal-complibs/zlib-1.2.8/crc32.c | 425 ++ c-blosc/internal-complibs/zlib-1.2.8/crc32.h | 441 ++ .../internal-complibs/zlib-1.2.8/deflate.c | 1967 +++++ .../internal-complibs/zlib-1.2.8/deflate.h | 346 + .../internal-complibs/zlib-1.2.8/gzclose.c | 25 + c-blosc/internal-complibs/zlib-1.2.8/gzguts.h | 209 + c-blosc/internal-complibs/zlib-1.2.8/gzlib.c | 634 ++ c-blosc/internal-complibs/zlib-1.2.8/gzread.c | 594 ++ .../internal-complibs/zlib-1.2.8/gzwrite.c | 577 ++ .../internal-complibs/zlib-1.2.8/infback.c | 640 ++ .../internal-complibs/zlib-1.2.8/inffast.c | 340 + .../internal-complibs/zlib-1.2.8/inffast.h | 11 + .../internal-complibs/zlib-1.2.8/inffixed.h | 94 + .../internal-complibs/zlib-1.2.8/inflate.c | 1512 ++++ .../internal-complibs/zlib-1.2.8/inflate.h | 122 + .../internal-complibs/zlib-1.2.8/inftrees.c | 306 + .../internal-complibs/zlib-1.2.8/inftrees.h | 62 + c-blosc/internal-complibs/zlib-1.2.8/trees.c | 1226 ++++ c-blosc/internal-complibs/zlib-1.2.8/trees.h | 128 + .../internal-complibs/zlib-1.2.8/uncompr.c | 59 + c-blosc/internal-complibs/zlib-1.2.8/zconf.h | 511 ++ c-blosc/internal-complibs/zlib-1.2.8/zlib.h | 1768 +++++ c-blosc/internal-complibs/zlib-1.2.8/zutil.c | 324 + c-blosc/internal-complibs/zlib-1.2.8/zutil.h | 253 + .../internal-complibs/zstd-1.3.4/.gitignore | 3 + c-blosc/internal-complibs/zstd-1.3.4/BUCK | 220 + c-blosc/internal-complibs/zstd-1.3.4/Makefile | 189 + .../internal-complibs/zstd-1.3.4/README.md | 115 + .../zstd-1.3.4/common/bitstream.h | 471 ++ .../zstd-1.3.4/common/compiler.h | 111 + .../internal-complibs/zstd-1.3.4/common/cpu.h | 216 + .../zstd-1.3.4/common/entropy_common.c | 221 + .../zstd-1.3.4/common/error_private.c | 48 + .../zstd-1.3.4/common/error_private.h | 76 + .../internal-complibs/zstd-1.3.4/common/fse.h | 704 ++ .../zstd-1.3.4/common/fse_decompress.c | 309 + .../internal-complibs/zstd-1.3.4/common/huf.h | 327 + .../internal-complibs/zstd-1.3.4/common/mem.h | 362 + .../zstd-1.3.4/common/pool.c | 283 + .../zstd-1.3.4/common/pool.h | 74 + .../zstd-1.3.4/common/threading.c | 75 + .../zstd-1.3.4/common/threading.h | 123 + .../zstd-1.3.4/common/xxhash.c | 875 +++ .../zstd-1.3.4/common/xxhash.h | 305 + .../zstd-1.3.4/common/zstd_common.c | 86 + .../zstd-1.3.4/common/zstd_errors.h | 92 + .../zstd-1.3.4/common/zstd_internal.h | 290 + .../zstd-1.3.4/compress/fse_compress.c | 849 +++ .../zstd-1.3.4/compress/huf_compress.c | 788 ++ .../zstd-1.3.4/compress/zstd_compress.c | 3449 +++++++++ .../compress/zstd_compress_internal.h | 709 ++ .../zstd-1.3.4/compress/zstd_double_fast.c | 327 + .../zstd-1.3.4/compress/zstd_double_fast.h | 36 + .../zstd-1.3.4/compress/zstd_fast.c | 259 + .../zstd-1.3.4/compress/zstd_fast.h | 35 + .../zstd-1.3.4/compress/zstd_lazy.c | 824 +++ .../zstd-1.3.4/compress/zstd_lazy.h | 56 + .../zstd-1.3.4/compress/zstd_ldm.c | 653 ++ .../zstd-1.3.4/compress/zstd_ldm.h | 111 + .../zstd-1.3.4/compress/zstd_opt.c | 923 +++ .../zstd-1.3.4/compress/zstd_opt.h | 42 + .../zstd-1.3.4/compress/zstdmt_compress.c | 1831 +++++ .../zstd-1.3.4/compress/zstdmt_compress.h | 156 + .../zstd-1.3.4/decompress/huf_decompress.c | 1096 +++ .../zstd-1.3.4/decompress/zstd_decompress.c | 3003 ++++++++ .../zstd-1.3.4/deprecated/zbuff.h | 213 + .../zstd-1.3.4/deprecated/zbuff_common.c | 26 + .../zstd-1.3.4/deprecated/zbuff_compress.c | 147 + .../zstd-1.3.4/deprecated/zbuff_decompress.c | 75 + .../zstd-1.3.4/dictBuilder/cover.c | 1048 +++ .../zstd-1.3.4/dictBuilder/divsufsort.c | 1913 +++++ .../zstd-1.3.4/dictBuilder/divsufsort.h | 67 + .../zstd-1.3.4/dictBuilder/zdict.c | 1108 +++ .../zstd-1.3.4/dictBuilder/zdict.h | 212 + .../zstd-1.3.4/dll/example/Makefile | 47 + .../zstd-1.3.4/dll/example/README.md | 69 + .../zstd-1.3.4/dll/example/build_package.bat | 19 + .../zstd-1.3.4/dll/example/fullbench-dll.sln | 25 + .../dll/example/fullbench-dll.vcxproj | 181 + .../zstd-1.3.4/dll/libzstd.def | 88 + .../zstd-1.3.4/legacy/zstd_legacy.h | 381 + .../zstd-1.3.4/legacy/zstd_v01.c | 2127 ++++++ .../zstd-1.3.4/legacy/zstd_v01.h | 89 + .../zstd-1.3.4/legacy/zstd_v02.c | 3483 +++++++++ .../zstd-1.3.4/legacy/zstd_v02.h | 88 + .../zstd-1.3.4/legacy/zstd_v03.c | 3124 ++++++++ .../zstd-1.3.4/legacy/zstd_v03.h | 88 + .../zstd-1.3.4/legacy/zstd_v04.c | 3677 ++++++++++ .../zstd-1.3.4/legacy/zstd_v04.h | 137 + .../zstd-1.3.4/legacy/zstd_v05.c | 4011 ++++++++++ .../zstd-1.3.4/legacy/zstd_v05.h | 157 + .../zstd-1.3.4/legacy/zstd_v06.c | 4124 +++++++++++ .../zstd-1.3.4/legacy/zstd_v06.h | 167 + .../zstd-1.3.4/legacy/zstd_v07.c | 4502 ++++++++++++ .../zstd-1.3.4/legacy/zstd_v07.h | 182 + .../zstd-1.3.4/libzstd.pc.in | 14 + c-blosc/internal-complibs/zstd-1.3.4/zstd.h | 1399 ++++ c-blosc/test_package/CMakeLists.txt | 8 + c-blosc/test_package/conanfile.py | 24 + c-blosc/test_package/example.cpp | 59 + c-blosc/tests/.gitignore | 1 + c-blosc/tests/CMakeLists.txt | 119 + c-blosc/tests/Makefile | 52 + c-blosc/tests/gcc-segfault-issue.c | 80 + c-blosc/tests/print_versions.c | 34 + c-blosc/tests/test_all.sh | 14 + c-blosc/tests/test_api.c | 150 + c-blosc/tests/test_common.h | 152 + c-blosc/tests/test_compress_roundtrip.c | 134 + c-blosc/tests/test_compress_roundtrip.csv | 267 + c-blosc/tests/test_compressor.c | 286 + c-blosc/tests/test_getitem.c | 130 + c-blosc/tests/test_getitem.csv | 400 + c-blosc/tests/test_maxout.c | 157 + c-blosc/tests/test_noinit.c | 108 + c-blosc/tests/test_nolock.c | 111 + c-blosc/tests/test_nthreads.c | 123 + c-blosc/tests/test_shuffle_roundtrip_avx2.c | 131 + c-blosc/tests/test_shuffle_roundtrip_avx2.csv | 400 + .../tests/test_shuffle_roundtrip_generic.c | 93 + .../tests/test_shuffle_roundtrip_generic.csv | 134 + c-blosc/tests/test_shuffle_roundtrip_sse2.c | 138 + c-blosc/tests/test_shuffle_roundtrip_sse2.csv | 400 + ci/appveyor/install.ps1 | 96 + ci/appveyor/missing-headers.ps1 | 53 + ci/appveyor/windows_sdk.cmd | 66 + contrib/README | 10 + contrib/make_hdf.py | 354 + contrib/nctoh5.py | 50 + cpuinfo.py | 1698 +++++ doc/Makefile | 193 + doc/make.bat | 242 + doc/scripts/filenode.py | 57 + doc/scripts/pickletrouble.py | 28 + doc/scripts/tutorial1.py | 312 + doc/source/FAQ.rst | 576 ++ doc/source/MIGRATING_TO_2.x.rst | 268 + doc/source/MIGRATING_TO_3.x.rst | 581 ++ doc/source/_static/logo-pytables-small.png | Bin 0 -> 16712 bytes doc/source/_templates/layout.html | 15 + doc/source/conf.py | 264 + doc/source/cookbook/custom_data_types.rst | 103 + doc/source/cookbook/hints_for_sql_users.rst | 725 ++ doc/source/cookbook/index.rst | 24 + doc/source/cookbook/inmemory_hdf5_files.rst | 140 + doc/source/cookbook/no_root_install.rst | 234 + doc/source/cookbook/py2exe_howto.rst | 85 + .../cookbook/py2exe_howto/pytables_test.py | 42 + doc/source/cookbook/simple_table.rst | 134 + .../cookbook/tailoring_atexit_hooks.rst | 61 + doc/source/cookbook/threading.rst | 274 + doc/source/dev_team.rst | 25 + doc/source/development.rst | 48 + doc/source/downloads.rst | 53 + doc/source/images/NumFocusSponsoredStamp.png | Bin 0 -> 13662 bytes doc/source/images/favicon.ico | Bin 0 -> 615 bytes doc/source/images/pytables-logo-notext.svg | 160 + doc/source/images/pytables-logo.svg | 162 + doc/source/index.rst | 67 + doc/source/irclogs.rst | 8 + doc/source/other_material.rst | 101 + doc/source/project_pointers.rst | 24 + .../release-notes/RELEASE_NOTES_v0.7.1.rst | 35 + .../release-notes/RELEASE_NOTES_v0.7.2.rst | 35 + .../release-notes/RELEASE_NOTES_v0.8.rst | 151 + .../release-notes/RELEASE_NOTES_v0.9.1.rst | 70 + .../release-notes/RELEASE_NOTES_v0.9.rst | 120 + .../release-notes/RELEASE_NOTES_v1.0.rst | 218 + .../release-notes/RELEASE_NOTES_v1.1.1.rst | 58 + .../release-notes/RELEASE_NOTES_v1.1.rst | 58 + .../release-notes/RELEASE_NOTES_v1.2.1.rst | 56 + .../release-notes/RELEASE_NOTES_v1.2.2.rst | 56 + .../release-notes/RELEASE_NOTES_v1.2.3.rst | 56 + .../release-notes/RELEASE_NOTES_v1.2.rst | 105 + .../release-notes/RELEASE_NOTES_v1.3.1.rst | 66 + .../release-notes/RELEASE_NOTES_v1.3.2.rst | 66 + .../release-notes/RELEASE_NOTES_v1.3.3.rst | 57 + .../release-notes/RELEASE_NOTES_v1.3.rst | 66 + .../release-notes/RELEASE_NOTES_v1.4.rst | 71 + .../RELEASE_NOTES_v2.0.x-pro.rst | 495 ++ .../release-notes/RELEASE_NOTES_v2.0.x.rst | 461 ++ .../RELEASE_NOTES_v2.1.x-pro.rst | 114 + .../release-notes/RELEASE_NOTES_v2.1.x.rst | 114 + .../RELEASE_NOTES_v2.2.x-pro.rst | 426 ++ .../release-notes/RELEASE_NOTES_v2.2.x.rst | 412 ++ .../release-notes/RELEASE_NOTES_v2.3.x.rst | 100 + .../release-notes/RELEASE_NOTES_v2.4.x.rst | 156 + .../release-notes/RELEASE_NOTES_v3.0.x.rst | 303 + .../release-notes/RELEASE_NOTES_v3.1.x.rst | 221 + .../release-notes/RELEASE_NOTES_v3.2.x.rst | 279 + .../release-notes/RELEASE_NOTES_v3.3.x.rst | 33 + .../release-notes/RELEASE_NOTES_v3.4.x.rst | 1 + doc/source/release_notes.rst | 135 + doc/source/usersguide/bibliography.rst | 162 + doc/source/usersguide/condition_syntax.rst | 138 + doc/source/usersguide/datatypes.rst | 117 + doc/source/usersguide/file_format.rst | 351 + doc/source/usersguide/filenode.rst | 292 + doc/source/usersguide/images/Q7-10m-noidx.png | Bin 0 -> 81873 bytes doc/source/usersguide/images/Q7-10m-noidx.svg | 1264 ++++ .../usersguide/images/Q8-1g-idx-SSD.png | Bin 0 -> 91417 bytes .../usersguide/images/Q8-1g-idx-SSD.svg | 1442 ++++ .../usersguide/images/Q8-1g-idx-compress.png | Bin 0 -> 95434 bytes .../usersguide/images/Q8-1g-idx-compress.svg | 1543 ++++ .../usersguide/images/Q8-1g-idx-optlevels.png | Bin 0 -> 111142 bytes .../usersguide/images/Q8-1g-idx-optlevels.svg | 1630 +++++ .../usersguide/images/Q8-1g-idx-sorted.png | Bin 0 -> 102858 bytes .../usersguide/images/Q8-1g-idx-sorted.svg | 5403 ++++++++++++++ doc/source/usersguide/images/Q8-1g-noidx.png | Bin 0 -> 83169 bytes doc/source/usersguide/images/Q8-1g-noidx.svg | 1345 ++++ .../images/compressed-recordsize-shuffle.png | Bin 0 -> 55690 bytes .../images/compressed-recordsize-shuffle.svg | 285 + .../images/compressed-recordsize-zlib.png | Bin 0 -> 40779 bytes .../images/compressed-recordsize-zlib.svg | 273 + .../images/compressed-recordsize.png | Bin 0 -> 42077 bytes .../images/compressed-recordsize.svg | 270 + .../compressed-select-cache-shuffle-only.svg | 288 + .../compressed-select-cache-shuffle.png | Bin 0 -> 58355 bytes .../compressed-select-cache-shuffle.svg | 297 + .../images/compressed-select-cache-zlib.png | Bin 0 -> 43630 bytes .../images/compressed-select-cache-zlib.svg | 291 + .../images/compressed-select-cache.png | Bin 0 -> 44305 bytes .../images/compressed-select-cache.svg | 288 + ...compressed-select-nocache-shuffle-only.png | Bin 0 -> 54301 bytes ...compressed-select-nocache-shuffle-only.svg | 282 + .../compressed-select-nocache-shuffle.svg | 291 + .../images/compressed-select-nocache.png | Bin 0 -> 60076 bytes .../images/compressed-select-nocache.svg | 288 + .../compressed-writing-shuffle-only.svg | 276 + .../images/compressed-writing-shuffle.png | Bin 0 -> 57395 bytes .../images/compressed-writing-shuffle.svg | 285 + .../images/compressed-writing-zlib.png | Bin 0 -> 41596 bytes .../images/compressed-writing-zlib.svg | 279 + .../usersguide/images/compressed-writing.png | Bin 0 -> 43090 bytes .../usersguide/images/compressed-writing.svg | 276 + .../images/create-chunksize-15GB.png | Bin 0 -> 69380 bytes .../images/create-chunksize-15GB.svg | 2906 ++++++++ .../create-index-time-int32-float64.png | Bin 0 -> 61390 bytes .../create-index-time-int32-float64.svg | 1707 +++++ .../images/filesizes-chunksize-15GB.png | Bin 0 -> 57535 bytes .../images/filesizes-chunksize-15GB.svg | 2872 ++++++++ .../usersguide/images/indexes-sizes2.png | Bin 0 -> 61049 bytes .../usersguide/images/indexes-sizes2.svg | 1447 ++++ .../usersguide/images/objecttree-h5.png | Bin 0 -> 84624 bytes doc/source/usersguide/images/objecttree.dia | Bin 0 -> 3545 bytes doc/source/usersguide/images/objecttree.pdf | Bin 0 -> 22134 bytes doc/source/usersguide/images/objecttree.png | Bin 0 -> 236392 bytes doc/source/usersguide/images/objecttree.svg | 828 +++ .../usersguide/images/pytables-front-logo.pdf | Bin 0 -> 76924 bytes .../usersguide/images/pytables-front-logo.svg | 222 + .../query-time-nhits-cold-cache-float64.svg | 475 ++ .../query-time-repeated-query-float64.svg | 342 + .../images/random-chunksize-15GB.png | Bin 0 -> 62011 bytes .../images/random-chunksize-15GB.svg | 2696 +++++++ .../read-medium-psyco-nopsyco-comparison.png | Bin 0 -> 65349 bytes .../read-medium-psyco-nopsyco-comparison.svg | 1925 +++++ .../usersguide/images/seq-chunksize-15GB.png | Bin 0 -> 70397 bytes .../usersguide/images/seq-chunksize-15GB.svg | 2815 +++++++ .../images/tutorial1-1-tableview.png | Bin 0 -> 89678 bytes .../images/tutorial1-2-tableview.png | Bin 0 -> 88101 bytes .../usersguide/images/tutorial1-general.png | Bin 0 -> 48720 bytes .../usersguide/images/tutorial2-tableview.png | Bin 0 -> 109956 bytes .../write-medium-psyco-nopsyco-comparison.png | Bin 0 -> 60237 bytes .../write-medium-psyco-nopsyco-comparison.svg | 1906 +++++ doc/source/usersguide/index.rst | 68 + doc/source/usersguide/installation.rst | 522 ++ doc/source/usersguide/introduction.rst | 319 + doc/source/usersguide/libref.rst | 36 + .../usersguide/libref/declarative_classes.rst | 280 + doc/source/usersguide/libref/expr_class.rst | 37 + doc/source/usersguide/libref/file_class.rst | 139 + .../usersguide/libref/filenode_classes.rst | 151 + .../usersguide/libref/helper_classes.rst | 152 + .../usersguide/libref/hierarchy_classes.rst | 242 + .../usersguide/libref/homogenous_storage.rst | 125 + doc/source/usersguide/libref/link_classes.rst | 73 + .../usersguide/libref/structured_storage.rst | 260 + doc/source/usersguide/libref/top_level.rst | 36 + doc/source/usersguide/optimization.rst | 1140 +++ doc/source/usersguide/parameter_files.rst | 155 + doc/source/usersguide/tutorials.rst | 2354 ++++++ doc/source/usersguide/usersguide.rst | 116 + doc/source/usersguide/utilities.rst | 502 ++ environment.yml | 3 + examples/Single_Table-vs-EArray_Table.ipynb | 846 +++ examples/add-column.py | 75 + examples/array1.py | 51 + examples/array2.py | 43 + examples/array3.py | 47 + examples/array4.py | 54 + examples/attributes1.py | 33 + examples/carray1.py | 20 + examples/check_examples.sh | 59 + examples/earray1.py | 16 + examples/earray2.py | 81 + examples/enum.py | 99 + examples/filenodes1.py | 54 + examples/index.py | 36 + examples/inmemory.py | 52 + examples/links.py | 51 + examples/multiprocess_access_benchmarks.py | 235 + examples/multiprocess_access_queues.py | 183 + examples/nested-tut.py | 133 + examples/nested1.py | 83 + examples/objecttree.py | 51 + examples/particles.py | 122 + examples/read_array_out_arg.py | 61 + examples/simple_threading.py | 100 + examples/split.py | 38 + examples/table-tree.py | 301 + examples/table1.py | 73 + examples/table2.py | 43 + examples/table3.py | 40 + examples/threading_monkeypatch.py | 136 + examples/tutorial1-1.py | 112 + examples/tutorial1-2.py | 279 + examples/tutorial2.py | 107 + examples/tutorial3-1.py | 49 + examples/tutorial3-2.py | 79 + examples/undo-redo.py | 142 + examples/vlarray1.py | 38 + examples/vlarray2.py | 100 + examples/vlarray3.py | 28 + examples/vlarray4.py | 28 + hdf5-blosc/.gitignore | 36 + hdf5-blosc/.travis.yml | 22 + hdf5-blosc/CMakeLists.txt | 71 + hdf5-blosc/LICENSES/BLOSC.txt | 21 + hdf5-blosc/LICENSES/BLOSC_HDF5.txt | 21 + hdf5-blosc/LICENSES/H5PY.txt | 34 + hdf5-blosc/README.rst | 69 + hdf5-blosc/src/blosc_filter.c | 271 + hdf5-blosc/src/blosc_filter.h | 27 + hdf5-blosc/src/blosc_plugin.c | 42 + hdf5-blosc/src/blosc_plugin.h | 36 + hdf5-blosc/src/example.c | 125 + hdf5-blosc/travis-before-install.sh | 16 + requirements.txt | 5 + setup.cfg | 9 + setup.py | 1051 +++ src/H5ARRAY-opt.c | 304 + src/H5ARRAY-opt.h | 36 + src/H5ARRAY.c | 891 +++ src/H5ARRAY.h | 95 + src/H5ATTR.c | 622 ++ src/H5ATTR.h | 86 + src/H5TB-opt.c | 698 ++ src/H5TB-opt.h | 60 + src/H5VLARRAY.c | 414 ++ src/H5VLARRAY.h | 47 + src/H5Zbzip2.c | 194 + src/H5Zbzip2.h | 7 + src/H5Zlzo.c | 305 + src/H5Zlzo.h | 7 + src/Makefile | 16 + src/idx-opt.c | 390 + src/idx-opt.h | 46 + src/tables.h | 8 + src/typeconv.c | 94 + src/typeconv.h | 37 + src/utils.c | 1014 +++ src/utils.h | 161 + src/version.h.in | 1 + subtree-merge-blosc.sh | 77 + tables/__init__.py | 223 + tables/_comp_bzip2.pyx | 24 + tables/_comp_lzo.pyx | 24 + tables/array.py | 952 +++ tables/atom.py | 1228 ++++ tables/attributeset.py | 719 ++ tables/carray.py | 300 + tables/conditions.py | 466 ++ tables/definitions.pxd | 571 ++ tables/description.py | 950 +++ tables/earray.py | 270 + tables/exceptions.py | 390 + tables/expression.py | 727 ++ tables/file.py | 2873 ++++++++ tables/filters.py | 463 ++ tables/flavor.py | 431 ++ tables/group.py | 1296 ++++ tables/hdf5extension.pxd | 42 + tables/hdf5extension.pyx | 2243 ++++++ tables/idxutils.py | 512 ++ tables/index.py | 2241 ++++++ tables/indexes.py | 198 + tables/indexesextension.pyx | 1532 ++++ tables/leaf.py | 787 ++ tables/link.py | 443 ++ tables/linkextension.pyx | 278 + tables/lrucacheextension.pxd | 84 + tables/lrucacheextension.pyx | 641 ++ tables/misc/__init__.py | 18 + tables/misc/enum.py | 462 ++ tables/misc/proxydict.py | 86 + tables/node.py | 939 +++ tables/nodes/__init__.py | 33 + tables/nodes/filenode.py | 890 +++ tables/nodes/tests/__init__.py | 13 + tables/nodes/tests/test_filenode.dat | 52 + tables/nodes/tests/test_filenode.py | 1062 +++ tables/nodes/tests/test_filenode.xbm | 52 + tables/nodes/tests/test_filenode_v1.h5 | Bin 0 -> 9062 bytes tables/parameters.py | 449 ++ tables/path.py | 251 + tables/registry.py | 99 + tables/req_versions.py | 25 + tables/scripts/__init__.py | 18 + tables/scripts/pt2to3.py | 527 ++ tables/scripts/ptdump.py | 196 + tables/scripts/ptrepack.py | 574 ++ tables/scripts/pttree.py | 469 ++ tables/table.py | 3733 ++++++++++ tables/tableextension.pyx | 1644 +++++ tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 | Bin 0 -> 19206 bytes tables/tests/Tables_lzo1.h5 | Bin 0 -> 23363 bytes tables/tests/Tables_lzo1_shuffle.h5 | Bin 0 -> 21097 bytes tables/tests/Tables_lzo2.h5 | Bin 0 -> 23398 bytes tables/tests/Tables_lzo2_shuffle.h5 | Bin 0 -> 21097 bytes tables/tests/__init__.py | 23 + tables/tests/array_mdatom.h5 | Bin 0 -> 5150 bytes tables/tests/attr-u16.h5 | Bin 0 -> 28782 bytes tables/tests/blosc_bigendian.h5 | Bin 0 -> 11974 bytes tables/tests/bug-idx.h5 | Bin 0 -> 14649 bytes tables/tests/check_leaks.py | 344 + tables/tests/common.py | 483 ++ tables/tests/create-nested-type.c | 123 + tables/tests/create_backcompat_indexes.py | 44 + tables/tests/elink.h5 | Bin 0 -> 3550 bytes tables/tests/elink2.h5 | Bin 0 -> 2238 bytes tables/tests/ex-noattr.h5 | Bin 0 -> 12342 bytes tables/tests/flavored_vlarrays-format1.6.h5 | Bin 0 -> 12621 bytes tables/tests/float.h5 | Bin 0 -> 4742 bytes tables/tests/idx-std-1.x.h5 | Bin 0 -> 26662 bytes tables/tests/indexes_2_0.h5 | Bin 0 -> 60801 bytes tables/tests/indexes_2_1.h5 | Bin 0 -> 147256 bytes tables/tests/issue_368.h5 | Bin 0 -> 1232 bytes tables/tests/issue_560.h5 | Bin 0 -> 2344 bytes tables/tests/matlab_file.mat | Bin 0 -> 1942 bytes tables/tests/nested-type-with-gaps.h5 | Bin 0 -> 1830 bytes tables/tests/non-chunked-table.h5 | Bin 0 -> 6184 bytes tables/tests/oldflavor_numeric.h5 | Bin 0 -> 112296 bytes tables/tests/python2.h5 | Bin 0 -> 79658 bytes tables/tests/python3.h5 | Bin 0 -> 79658 bytes tables/tests/scalar.h5 | Bin 0 -> 8294 bytes tables/tests/slink.h5 | Bin 0 -> 5502 bytes tables/tests/smpl_SDSextendible.h5 | Bin 0 -> 6246 bytes tables/tests/smpl_compound_chunked.h5 | Bin 0 -> 5774 bytes tables/tests/smpl_enum.h5 | Bin 0 -> 2094 bytes tables/tests/smpl_f64be.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_f64le.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_i32be.h5 | Bin 0 -> 2174 bytes tables/tests/smpl_i32le.h5 | Bin 0 -> 2174 bytes tables/tests/smpl_i64be.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_i64le.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_unsupptype.h5 | Bin 0 -> 11870 bytes tables/tests/test_all.py | 143 + tables/tests/test_array.py | 2742 +++++++ tables/tests/test_attributes.py | 1832 +++++ tables/tests/test_aux.py | 35 + tables/tests/test_backcompat.py | 232 + tables/tests/test_basics.py | 2559 +++++++ tables/tests/test_carray.py | 2849 ++++++++ tables/tests/test_create.py | 2640 +++++++ tables/tests/test_do_undo.py | 2745 +++++++ tables/tests/test_earray.py | 2893 ++++++++ tables/tests/test_enum.py | 686 ++ tables/tests/test_expression.py | 1587 ++++ tables/tests/test_garbage.py | 76 + tables/tests/test_hdf5compat.py | 394 + tables/tests/test_index_backcompat.py | 175 + tables/tests/test_indexes.py | 2714 +++++++ tables/tests/test_indexvalues.py | 3417 +++++++++ tables/tests/test_links.py | 622 ++ tables/tests/test_lists.py | 462 ++ tables/tests/test_nestedtypes.py | 1545 ++++ tables/tests/test_numpy.py | 1411 ++++ tables/tests/test_queries.py | 1303 ++++ tables/tests/test_ref_array1.mat | Bin 0 -> 16192 bytes tables/tests/test_ref_array2.mat | Bin 0 -> 4832 bytes tables/tests/test_szip.h5 | Bin 0 -> 5594 bytes tables/tests/test_tables.py | 6498 +++++++++++++++++ tables/tests/test_tablesMD.py | 2269 ++++++ tables/tests/test_timestamps.py | 183 + tables/tests/test_timetype.py | 539 ++ tables/tests/test_tree.py | 1090 +++ tables/tests/test_types.py | 337 + tables/tests/test_utils.py | 98 + tables/tests/test_vlarray.py | 4419 +++++++++++ tables/tests/time-table-vlarray-1_x.h5 | Bin 0 -> 3766 bytes tables/tests/times-nested-be.h5 | Bin 0 -> 22674 bytes tables/tests/vlstr_attr.h5 | Bin 0 -> 5294 bytes tables/tests/vlunicode_endian.h5 | Bin 0 -> 82022 bytes tables/tests/zerodim-attrs-1.3.h5 | Bin 0 -> 5102 bytes tables/tests/zerodim-attrs-1.4.h5 | Bin 0 -> 4366 bytes tables/undoredo.py | 184 + tables/unimplemented.py | 167 + tables/utils.py | 480 ++ tables/utilsextension.pxd | 24 + tables/utilsextension.pyx | 1575 ++++ tables/vlarray.py | 884 +++ utils/pt2to3 | 3 + utils/ptdump | 3 + utils/ptrepack | 3 + utils/pttree | 3 + 694 files changed, 252677 insertions(+) create mode 100644 .gitignore create mode 100644 .readthedocs.yml create mode 100644 .travis.yml create mode 100644 ANNOUNCE.txt.in create mode 100644 CITATION.bib create mode 100644 LICENSE.txt create mode 100644 LICENSES/BLOSC.txt create mode 100644 LICENSES/CLOUD-SPTHEME.txt create mode 100644 LICENSES/FASTLZ.txt create mode 100644 LICENSES/H5PY.txt create mode 100644 LICENSES/HDF5.txt create mode 100644 LICENSES/LZ4.txt create mode 100644 LICENSES/SNAPPY.txt create mode 100644 LICENSES/STDINT.txt create mode 100644 LICENSES/WIN32PTHREADS.txt create mode 100644 LICENSES/ZLIB.txt create mode 100644 LICENSES/ZSTD.TXT create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.rst create mode 100644 RELEASE_NOTES.txt create mode 100644 THANKS create mode 100644 VERSION create mode 100644 appveyor.yml create mode 100644 bench/LRU-experiments.py create mode 100644 bench/LRU-experiments2.py create mode 100644 bench/LRUcache-node-bench.py create mode 100755 bench/bench-postgres-ranges.sh create mode 100755 bench/bench-pytables-ranges.sh create mode 100755 bench/bench-pytables.sh create mode 100644 bench/blosc.py create mode 100644 bench/bsddb-table-bench.py create mode 100644 bench/cacheout.py create mode 100644 bench/chunkshape-bench.py create mode 100644 bench/chunkshape-testing.py create mode 100644 bench/collations.py create mode 100644 bench/copy-bench.py create mode 100644 bench/create-large-number-objects.py create mode 100644 bench/deep-tree-h5py.py create mode 100644 bench/deep-tree.py create mode 100644 bench/evaluate.py create mode 100644 bench/expression.py create mode 100644 bench/get-figures-ranges.py create mode 100644 bench/get-figures.py create mode 100644 bench/indexed_search.py create mode 100644 bench/keysort.py create mode 100644 bench/lookup_bench.py create mode 100644 bench/open_close-bench.py create mode 100644 bench/opteron-stress-test.txt create mode 100644 bench/optimal-chunksize.py create mode 100644 bench/plot-bar.py create mode 100644 bench/plot-comparison-lzo-zlib-ucl.gnuplot create mode 100644 bench/plot-comparison-psyco-lzo.gnuplot create mode 100644 bench/poly.py create mode 100644 bench/postgres-search-bench.py create mode 100644 bench/postgres_backend.py create mode 100644 bench/pytables-search-bench.py create mode 100644 bench/pytables_backend.py create mode 100644 bench/recarray2-test.py create mode 100644 bench/search-bench-plot.py create mode 100755 bench/search-bench-rnd.sh create mode 100644 bench/search-bench.py create mode 100755 bench/search-bench.sh create mode 100644 bench/searchsorted-bench.py create mode 100644 bench/searchsorted-bench2.py create mode 100644 bench/shelve-bench.py create mode 100644 bench/split-file.py create mode 100755 bench/sqlite-search-bench-rnd.sh create mode 100644 bench/sqlite-search-bench.py create mode 100755 bench/sqlite-search-bench.sh create mode 100644 bench/sqlite3-search-bench.py create mode 100644 bench/stress-test.py create mode 100644 bench/stress-test2.py create mode 100644 bench/stress-test3.py create mode 100644 bench/table-bench.py create mode 100644 bench/table-copy.py create mode 100644 bench/undo_redo.py create mode 100644 bench/undo_redo.txt create mode 100644 bench/widetree.py create mode 100644 bench/widetree2.py create mode 100644 bench/woody-pentiumIV.txt create mode 100644 c-blosc/ANNOUNCE.rst create mode 100644 c-blosc/CMakeLists.txt create mode 100644 c-blosc/LICENSES/BITSHUFFLE.txt create mode 100644 c-blosc/LICENSES/BLOSC.txt create mode 100644 c-blosc/LICENSES/FASTLZ.txt create mode 100644 c-blosc/LICENSES/LZ4.txt create mode 100644 c-blosc/LICENSES/SNAPPY.txt create mode 100644 c-blosc/LICENSES/STDINT.txt create mode 100644 c-blosc/LICENSES/ZLIB.txt create mode 100644 c-blosc/README.md create mode 100644 c-blosc/README_HEADER.rst create mode 100644 c-blosc/README_THREADED.rst create mode 100644 c-blosc/RELEASE_NOTES.rst create mode 100644 c-blosc/RELEASING.rst create mode 100644 c-blosc/THANKS.rst create mode 100644 c-blosc/THOUGHTS_FOR_2.0.txt create mode 100644 c-blosc/appveyor.yml create mode 100644 c-blosc/appveyor/run_with_env.cmd create mode 100644 c-blosc/bench/CMakeLists.txt create mode 100644 c-blosc/bench/Makefile create mode 100644 c-blosc/bench/Makefile.mingw create mode 100644 c-blosc/bench/bench.c create mode 100644 c-blosc/bench/plot-speeds.py create mode 100644 c-blosc/blosc.pc.in create mode 100644 c-blosc/blosc/CMakeLists.txt create mode 100644 c-blosc/blosc/bitshuffle-avx2.c create mode 100644 c-blosc/blosc/bitshuffle-avx2.h create mode 100644 c-blosc/blosc/bitshuffle-generic.c create mode 100644 c-blosc/blosc/bitshuffle-generic.h create mode 100644 c-blosc/blosc/bitshuffle-sse2.c create mode 100644 c-blosc/blosc/bitshuffle-sse2.h create mode 100644 c-blosc/blosc/blosc-common.h create mode 100644 c-blosc/blosc/blosc-export.h create mode 100644 c-blosc/blosc/blosc.c create mode 100644 c-blosc/blosc/blosc.h create mode 100644 c-blosc/blosc/blosclz.c create mode 100644 c-blosc/blosc/blosclz.h create mode 100644 c-blosc/blosc/config.h.in create mode 100644 c-blosc/blosc/fastcopy.c create mode 100644 c-blosc/blosc/fastcopy.h create mode 100644 c-blosc/blosc/shuffle-avx2.c create mode 100644 c-blosc/blosc/shuffle-avx2.h create mode 100644 c-blosc/blosc/shuffle-generic.c create mode 100644 c-blosc/blosc/shuffle-generic.h create mode 100644 c-blosc/blosc/shuffle-sse2.c create mode 100644 c-blosc/blosc/shuffle-sse2.h create mode 100644 c-blosc/blosc/shuffle.c create mode 100644 c-blosc/blosc/shuffle.h create mode 100644 c-blosc/blosc/win32/pthread.c create mode 100644 c-blosc/blosc/win32/pthread.h create mode 100644 c-blosc/blosc/win32/stdint-windows.h create mode 100644 c-blosc/build.py create mode 100644 c-blosc/cmake/FindLZ4.cmake create mode 100644 c-blosc/cmake/FindSnappy.cmake create mode 100644 c-blosc/cmake/FindZstd.cmake create mode 100644 c-blosc/cmake_uninstall.cmake.in create mode 100644 c-blosc/compat/CMakeLists.txt create mode 100644 c-blosc/compat/README.rst create mode 100644 c-blosc/compat/blosc-1.11.1-blosclz.cdata create mode 100644 c-blosc/compat/blosc-1.11.1-lz4.cdata create mode 100644 c-blosc/compat/blosc-1.11.1-lz4hc.cdata create mode 100644 c-blosc/compat/blosc-1.11.1-snappy.cdata create mode 100644 c-blosc/compat/blosc-1.11.1-zlib.cdata create mode 100644 c-blosc/compat/blosc-1.11.1-zstd.cdata create mode 100644 c-blosc/compat/blosc-1.14.0-blosclz.cdata create mode 100644 c-blosc/compat/blosc-1.14.0-lz4.cdata create mode 100644 c-blosc/compat/blosc-1.14.0-lz4hc.cdata create mode 100644 c-blosc/compat/blosc-1.14.0-snappy.cdata create mode 100644 c-blosc/compat/blosc-1.14.0-zlib.cdata create mode 100644 c-blosc/compat/blosc-1.14.0-zstd.cdata create mode 100644 c-blosc/compat/blosc-1.3.0-blosclz.cdata create mode 100644 c-blosc/compat/blosc-1.3.0-lz4.cdata create mode 100644 c-blosc/compat/blosc-1.3.0-lz4hc.cdata create mode 100644 c-blosc/compat/blosc-1.3.0-snappy.cdata create mode 100644 c-blosc/compat/blosc-1.3.0-zlib.cdata create mode 100644 c-blosc/compat/blosc-1.7.0-blosclz.cdata create mode 100644 c-blosc/compat/blosc-1.7.0-lz4.cdata create mode 100644 c-blosc/compat/blosc-1.7.0-lz4hc.cdata create mode 100644 c-blosc/compat/blosc-1.7.0-snappy.cdata create mode 100644 c-blosc/compat/blosc-1.7.0-zlib.cdata create mode 100644 c-blosc/compat/filegen.c create mode 100644 c-blosc/conanfile.py create mode 100644 c-blosc/examples/README.rst create mode 100644 c-blosc/examples/many_compressors.c create mode 100644 c-blosc/examples/multithread.c create mode 100644 c-blosc/examples/noinit.c create mode 100644 c-blosc/examples/simple.c create mode 100644 c-blosc/examples/win-dynamic-linking.c create mode 100644 c-blosc/internal-complibs/lz4-1.8.1.2/lz4.c create mode 100644 c-blosc/internal-complibs/lz4-1.8.1.2/lz4.h create mode 100644 c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.c create mode 100644 c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.h create mode 100644 c-blosc/internal-complibs/lz4-1.8.1.2/lz4opt.h create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/add-version.patch create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/msvc1.patch create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/msvc2.patch create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-c.cc create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-c.h create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-internal.h create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.cc create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.h create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.cc create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.h create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-public.h create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy.cc create mode 100644 c-blosc/internal-complibs/snappy-1.1.1/snappy.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/adler32.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/compress.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/crc32.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/crc32.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/deflate.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/deflate.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/gzclose.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/gzguts.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/gzlib.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/gzread.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/gzwrite.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/infback.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inffast.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inffast.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inffixed.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inflate.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inflate.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inftrees.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/inftrees.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/trees.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/trees.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/uncompr.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/zconf.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/zlib.h create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/zutil.c create mode 100644 c-blosc/internal-complibs/zlib-1.2.8/zutil.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/.gitignore create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/BUCK create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/Makefile create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/README.md create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/bitstream.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/compiler.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/cpu.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/entropy_common.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/error_private.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/error_private.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/fse.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/fse_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/huf.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/mem.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/pool.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/pool.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/threading.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/threading.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/zstd_common.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/zstd_errors.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/common/zstd_internal.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/fse_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/huf_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress_internal.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_double_fast.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_double_fast.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/decompress/huf_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/decompress/zstd_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_common.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/cover.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dll/example/Makefile create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dll/example/README.md create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dll/example/build_package.bat create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.sln create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.vcxproj create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/dll/libzstd.def create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_legacy.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v02.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v02.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v03.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v03.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v04.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v04.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v05.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v05.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v06.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v06.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v07.c create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v07.h create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/libzstd.pc.in create mode 100644 c-blosc/internal-complibs/zstd-1.3.4/zstd.h create mode 100644 c-blosc/test_package/CMakeLists.txt create mode 100644 c-blosc/test_package/conanfile.py create mode 100644 c-blosc/test_package/example.cpp create mode 100644 c-blosc/tests/.gitignore create mode 100644 c-blosc/tests/CMakeLists.txt create mode 100644 c-blosc/tests/Makefile create mode 100644 c-blosc/tests/gcc-segfault-issue.c create mode 100644 c-blosc/tests/print_versions.c create mode 100644 c-blosc/tests/test_all.sh create mode 100644 c-blosc/tests/test_api.c create mode 100644 c-blosc/tests/test_common.h create mode 100644 c-blosc/tests/test_compress_roundtrip.c create mode 100644 c-blosc/tests/test_compress_roundtrip.csv create mode 100644 c-blosc/tests/test_compressor.c create mode 100644 c-blosc/tests/test_getitem.c create mode 100644 c-blosc/tests/test_getitem.csv create mode 100644 c-blosc/tests/test_maxout.c create mode 100644 c-blosc/tests/test_noinit.c create mode 100644 c-blosc/tests/test_nolock.c create mode 100644 c-blosc/tests/test_nthreads.c create mode 100644 c-blosc/tests/test_shuffle_roundtrip_avx2.c create mode 100644 c-blosc/tests/test_shuffle_roundtrip_avx2.csv create mode 100644 c-blosc/tests/test_shuffle_roundtrip_generic.c create mode 100644 c-blosc/tests/test_shuffle_roundtrip_generic.csv create mode 100644 c-blosc/tests/test_shuffle_roundtrip_sse2.c create mode 100644 c-blosc/tests/test_shuffle_roundtrip_sse2.csv create mode 100644 ci/appveyor/install.ps1 create mode 100644 ci/appveyor/missing-headers.ps1 create mode 100644 ci/appveyor/windows_sdk.cmd create mode 100644 contrib/README create mode 100644 contrib/make_hdf.py create mode 100755 contrib/nctoh5.py create mode 100644 cpuinfo.py create mode 100644 doc/Makefile create mode 100644 doc/make.bat create mode 100644 doc/scripts/filenode.py create mode 100644 doc/scripts/pickletrouble.py create mode 100644 doc/scripts/tutorial1.py create mode 100644 doc/source/FAQ.rst create mode 100644 doc/source/MIGRATING_TO_2.x.rst create mode 100644 doc/source/MIGRATING_TO_3.x.rst create mode 100644 doc/source/_static/logo-pytables-small.png create mode 100644 doc/source/_templates/layout.html create mode 100644 doc/source/conf.py create mode 100644 doc/source/cookbook/custom_data_types.rst create mode 100644 doc/source/cookbook/hints_for_sql_users.rst create mode 100644 doc/source/cookbook/index.rst create mode 100644 doc/source/cookbook/inmemory_hdf5_files.rst create mode 100644 doc/source/cookbook/no_root_install.rst create mode 100644 doc/source/cookbook/py2exe_howto.rst create mode 100644 doc/source/cookbook/py2exe_howto/pytables_test.py create mode 100644 doc/source/cookbook/simple_table.rst create mode 100644 doc/source/cookbook/tailoring_atexit_hooks.rst create mode 100644 doc/source/cookbook/threading.rst create mode 100644 doc/source/dev_team.rst create mode 100644 doc/source/development.rst create mode 100644 doc/source/downloads.rst create mode 100644 doc/source/images/NumFocusSponsoredStamp.png create mode 100644 doc/source/images/favicon.ico create mode 100644 doc/source/images/pytables-logo-notext.svg create mode 100644 doc/source/images/pytables-logo.svg create mode 100644 doc/source/index.rst create mode 100644 doc/source/irclogs.rst create mode 100644 doc/source/other_material.rst create mode 100644 doc/source/project_pointers.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.7.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.7.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.8.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.9.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.9.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.0.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.1.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.3.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.3.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.4.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.0.x-pro.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.0.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.1.x-pro.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.1.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.2.x-pro.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.2.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.3.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.4.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.0.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.1.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.2.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.3.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.4.x.rst create mode 100644 doc/source/release_notes.rst create mode 100644 doc/source/usersguide/bibliography.rst create mode 100644 doc/source/usersguide/condition_syntax.rst create mode 100644 doc/source/usersguide/datatypes.rst create mode 100644 doc/source/usersguide/file_format.rst create mode 100644 doc/source/usersguide/filenode.rst create mode 100644 doc/source/usersguide/images/Q7-10m-noidx.png create mode 100644 doc/source/usersguide/images/Q7-10m-noidx.svg create mode 100644 doc/source/usersguide/images/Q8-1g-idx-SSD.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-SSD.svg create mode 100644 doc/source/usersguide/images/Q8-1g-idx-compress.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-compress.svg create mode 100644 doc/source/usersguide/images/Q8-1g-idx-optlevels.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-optlevels.svg create mode 100644 doc/source/usersguide/images/Q8-1g-idx-sorted.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-sorted.svg create mode 100644 doc/source/usersguide/images/Q8-1g-noidx.png create mode 100644 doc/source/usersguide/images/Q8-1g-noidx.svg create mode 100644 doc/source/usersguide/images/compressed-recordsize-shuffle.png create mode 100644 doc/source/usersguide/images/compressed-recordsize-shuffle.svg create mode 100644 doc/source/usersguide/images/compressed-recordsize-zlib.png create mode 100644 doc/source/usersguide/images/compressed-recordsize-zlib.svg create mode 100644 doc/source/usersguide/images/compressed-recordsize.png create mode 100644 doc/source/usersguide/images/compressed-recordsize.svg create mode 100644 doc/source/usersguide/images/compressed-select-cache-shuffle-only.svg create mode 100644 doc/source/usersguide/images/compressed-select-cache-shuffle.png create mode 100644 doc/source/usersguide/images/compressed-select-cache-shuffle.svg create mode 100644 doc/source/usersguide/images/compressed-select-cache-zlib.png create mode 100644 doc/source/usersguide/images/compressed-select-cache-zlib.svg create mode 100644 doc/source/usersguide/images/compressed-select-cache.png create mode 100644 doc/source/usersguide/images/compressed-select-cache.svg create mode 100644 doc/source/usersguide/images/compressed-select-nocache-shuffle-only.png create mode 100644 doc/source/usersguide/images/compressed-select-nocache-shuffle-only.svg create mode 100644 doc/source/usersguide/images/compressed-select-nocache-shuffle.svg create mode 100644 doc/source/usersguide/images/compressed-select-nocache.png create mode 100644 doc/source/usersguide/images/compressed-select-nocache.svg create mode 100644 doc/source/usersguide/images/compressed-writing-shuffle-only.svg create mode 100644 doc/source/usersguide/images/compressed-writing-shuffle.png create mode 100644 doc/source/usersguide/images/compressed-writing-shuffle.svg create mode 100644 doc/source/usersguide/images/compressed-writing-zlib.png create mode 100644 doc/source/usersguide/images/compressed-writing-zlib.svg create mode 100644 doc/source/usersguide/images/compressed-writing.png create mode 100644 doc/source/usersguide/images/compressed-writing.svg create mode 100644 doc/source/usersguide/images/create-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/create-chunksize-15GB.svg create mode 100644 doc/source/usersguide/images/create-index-time-int32-float64.png create mode 100644 doc/source/usersguide/images/create-index-time-int32-float64.svg create mode 100644 doc/source/usersguide/images/filesizes-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/filesizes-chunksize-15GB.svg create mode 100644 doc/source/usersguide/images/indexes-sizes2.png create mode 100644 doc/source/usersguide/images/indexes-sizes2.svg create mode 100644 doc/source/usersguide/images/objecttree-h5.png create mode 100644 doc/source/usersguide/images/objecttree.dia create mode 100644 doc/source/usersguide/images/objecttree.pdf create mode 100644 doc/source/usersguide/images/objecttree.png create mode 100644 doc/source/usersguide/images/objecttree.svg create mode 100644 doc/source/usersguide/images/pytables-front-logo.pdf create mode 100644 doc/source/usersguide/images/pytables-front-logo.svg create mode 100644 doc/source/usersguide/images/query-time-nhits-cold-cache-float64.svg create mode 100644 doc/source/usersguide/images/query-time-repeated-query-float64.svg create mode 100644 doc/source/usersguide/images/random-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/random-chunksize-15GB.svg create mode 100644 doc/source/usersguide/images/read-medium-psyco-nopsyco-comparison.png create mode 100644 doc/source/usersguide/images/read-medium-psyco-nopsyco-comparison.svg create mode 100644 doc/source/usersguide/images/seq-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/seq-chunksize-15GB.svg create mode 100644 doc/source/usersguide/images/tutorial1-1-tableview.png create mode 100644 doc/source/usersguide/images/tutorial1-2-tableview.png create mode 100644 doc/source/usersguide/images/tutorial1-general.png create mode 100644 doc/source/usersguide/images/tutorial2-tableview.png create mode 100644 doc/source/usersguide/images/write-medium-psyco-nopsyco-comparison.png create mode 100644 doc/source/usersguide/images/write-medium-psyco-nopsyco-comparison.svg create mode 100644 doc/source/usersguide/index.rst create mode 100644 doc/source/usersguide/installation.rst create mode 100644 doc/source/usersguide/introduction.rst create mode 100644 doc/source/usersguide/libref.rst create mode 100644 doc/source/usersguide/libref/declarative_classes.rst create mode 100644 doc/source/usersguide/libref/expr_class.rst create mode 100644 doc/source/usersguide/libref/file_class.rst create mode 100644 doc/source/usersguide/libref/filenode_classes.rst create mode 100644 doc/source/usersguide/libref/helper_classes.rst create mode 100644 doc/source/usersguide/libref/hierarchy_classes.rst create mode 100644 doc/source/usersguide/libref/homogenous_storage.rst create mode 100644 doc/source/usersguide/libref/link_classes.rst create mode 100644 doc/source/usersguide/libref/structured_storage.rst create mode 100644 doc/source/usersguide/libref/top_level.rst create mode 100644 doc/source/usersguide/optimization.rst create mode 100644 doc/source/usersguide/parameter_files.rst create mode 100644 doc/source/usersguide/tutorials.rst create mode 100644 doc/source/usersguide/usersguide.rst create mode 100644 doc/source/usersguide/utilities.rst create mode 100644 environment.yml create mode 100644 examples/Single_Table-vs-EArray_Table.ipynb create mode 100644 examples/add-column.py create mode 100644 examples/array1.py create mode 100644 examples/array2.py create mode 100644 examples/array3.py create mode 100644 examples/array4.py create mode 100644 examples/attributes1.py create mode 100644 examples/carray1.py create mode 100755 examples/check_examples.sh create mode 100644 examples/earray1.py create mode 100644 examples/earray2.py create mode 100644 examples/enum.py create mode 100644 examples/filenodes1.py create mode 100644 examples/index.py create mode 100644 examples/inmemory.py create mode 100644 examples/links.py create mode 100644 examples/multiprocess_access_benchmarks.py create mode 100644 examples/multiprocess_access_queues.py create mode 100644 examples/nested-tut.py create mode 100644 examples/nested1.py create mode 100644 examples/objecttree.py create mode 100644 examples/particles.py create mode 100644 examples/read_array_out_arg.py create mode 100644 examples/simple_threading.py create mode 100644 examples/split.py create mode 100644 examples/table-tree.py create mode 100644 examples/table1.py create mode 100644 examples/table2.py create mode 100644 examples/table3.py create mode 100644 examples/threading_monkeypatch.py create mode 100644 examples/tutorial1-1.py create mode 100644 examples/tutorial1-2.py create mode 100644 examples/tutorial2.py create mode 100644 examples/tutorial3-1.py create mode 100644 examples/tutorial3-2.py create mode 100644 examples/undo-redo.py create mode 100644 examples/vlarray1.py create mode 100644 examples/vlarray2.py create mode 100644 examples/vlarray3.py create mode 100644 examples/vlarray4.py create mode 100644 hdf5-blosc/.gitignore create mode 100644 hdf5-blosc/.travis.yml create mode 100644 hdf5-blosc/CMakeLists.txt create mode 100644 hdf5-blosc/LICENSES/BLOSC.txt create mode 100644 hdf5-blosc/LICENSES/BLOSC_HDF5.txt create mode 100644 hdf5-blosc/LICENSES/H5PY.txt create mode 100644 hdf5-blosc/README.rst create mode 100644 hdf5-blosc/src/blosc_filter.c create mode 100644 hdf5-blosc/src/blosc_filter.h create mode 100644 hdf5-blosc/src/blosc_plugin.c create mode 100644 hdf5-blosc/src/blosc_plugin.h create mode 100644 hdf5-blosc/src/example.c create mode 100755 hdf5-blosc/travis-before-install.sh create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 src/H5ARRAY-opt.c create mode 100644 src/H5ARRAY-opt.h create mode 100644 src/H5ARRAY.c create mode 100644 src/H5ARRAY.h create mode 100644 src/H5ATTR.c create mode 100644 src/H5ATTR.h create mode 100644 src/H5TB-opt.c create mode 100644 src/H5TB-opt.h create mode 100644 src/H5VLARRAY.c create mode 100644 src/H5VLARRAY.h create mode 100644 src/H5Zbzip2.c create mode 100644 src/H5Zbzip2.h create mode 100644 src/H5Zlzo.c create mode 100644 src/H5Zlzo.h create mode 100644 src/Makefile create mode 100644 src/idx-opt.c create mode 100644 src/idx-opt.h create mode 100644 src/tables.h create mode 100644 src/typeconv.c create mode 100644 src/typeconv.h create mode 100644 src/utils.c create mode 100644 src/utils.h create mode 100644 src/version.h.in create mode 100755 subtree-merge-blosc.sh create mode 100644 tables/__init__.py create mode 100644 tables/_comp_bzip2.pyx create mode 100644 tables/_comp_lzo.pyx create mode 100644 tables/array.py create mode 100644 tables/atom.py create mode 100644 tables/attributeset.py create mode 100644 tables/carray.py create mode 100644 tables/conditions.py create mode 100644 tables/definitions.pxd create mode 100644 tables/description.py create mode 100644 tables/earray.py create mode 100644 tables/exceptions.py create mode 100644 tables/expression.py create mode 100644 tables/file.py create mode 100644 tables/filters.py create mode 100644 tables/flavor.py create mode 100644 tables/group.py create mode 100644 tables/hdf5extension.pxd create mode 100644 tables/hdf5extension.pyx create mode 100644 tables/idxutils.py create mode 100644 tables/index.py create mode 100644 tables/indexes.py create mode 100644 tables/indexesextension.pyx create mode 100644 tables/leaf.py create mode 100644 tables/link.py create mode 100644 tables/linkextension.pyx create mode 100644 tables/lrucacheextension.pxd create mode 100644 tables/lrucacheextension.pyx create mode 100644 tables/misc/__init__.py create mode 100644 tables/misc/enum.py create mode 100644 tables/misc/proxydict.py create mode 100644 tables/node.py create mode 100644 tables/nodes/__init__.py create mode 100644 tables/nodes/filenode.py create mode 100644 tables/nodes/tests/__init__.py create mode 100644 tables/nodes/tests/test_filenode.dat create mode 100644 tables/nodes/tests/test_filenode.py create mode 100644 tables/nodes/tests/test_filenode.xbm create mode 100644 tables/nodes/tests/test_filenode_v1.h5 create mode 100644 tables/parameters.py create mode 100644 tables/path.py create mode 100644 tables/registry.py create mode 100644 tables/req_versions.py create mode 100644 tables/scripts/__init__.py create mode 100644 tables/scripts/pt2to3.py create mode 100644 tables/scripts/ptdump.py create mode 100644 tables/scripts/ptrepack.py create mode 100644 tables/scripts/pttree.py create mode 100644 tables/table.py create mode 100644 tables/tableextension.pyx create mode 100644 tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 create mode 100644 tables/tests/Tables_lzo1.h5 create mode 100644 tables/tests/Tables_lzo1_shuffle.h5 create mode 100644 tables/tests/Tables_lzo2.h5 create mode 100644 tables/tests/Tables_lzo2_shuffle.h5 create mode 100644 tables/tests/__init__.py create mode 100644 tables/tests/array_mdatom.h5 create mode 100644 tables/tests/attr-u16.h5 create mode 100644 tables/tests/blosc_bigendian.h5 create mode 100644 tables/tests/bug-idx.h5 create mode 100644 tables/tests/check_leaks.py create mode 100644 tables/tests/common.py create mode 100644 tables/tests/create-nested-type.c create mode 100644 tables/tests/create_backcompat_indexes.py create mode 100644 tables/tests/elink.h5 create mode 100644 tables/tests/elink2.h5 create mode 100644 tables/tests/ex-noattr.h5 create mode 100644 tables/tests/flavored_vlarrays-format1.6.h5 create mode 100644 tables/tests/float.h5 create mode 100644 tables/tests/idx-std-1.x.h5 create mode 100644 tables/tests/indexes_2_0.h5 create mode 100644 tables/tests/indexes_2_1.h5 create mode 100644 tables/tests/issue_368.h5 create mode 100644 tables/tests/issue_560.h5 create mode 100644 tables/tests/matlab_file.mat create mode 100644 tables/tests/nested-type-with-gaps.h5 create mode 100644 tables/tests/non-chunked-table.h5 create mode 100644 tables/tests/oldflavor_numeric.h5 create mode 100644 tables/tests/python2.h5 create mode 100644 tables/tests/python3.h5 create mode 100644 tables/tests/scalar.h5 create mode 100644 tables/tests/slink.h5 create mode 100644 tables/tests/smpl_SDSextendible.h5 create mode 100644 tables/tests/smpl_compound_chunked.h5 create mode 100644 tables/tests/smpl_enum.h5 create mode 100644 tables/tests/smpl_f64be.h5 create mode 100644 tables/tests/smpl_f64le.h5 create mode 100644 tables/tests/smpl_i32be.h5 create mode 100644 tables/tests/smpl_i32le.h5 create mode 100644 tables/tests/smpl_i64be.h5 create mode 100644 tables/tests/smpl_i64le.h5 create mode 100644 tables/tests/smpl_unsupptype.h5 create mode 100644 tables/tests/test_all.py create mode 100644 tables/tests/test_array.py create mode 100644 tables/tests/test_attributes.py create mode 100644 tables/tests/test_aux.py create mode 100644 tables/tests/test_backcompat.py create mode 100644 tables/tests/test_basics.py create mode 100644 tables/tests/test_carray.py create mode 100644 tables/tests/test_create.py create mode 100644 tables/tests/test_do_undo.py create mode 100644 tables/tests/test_earray.py create mode 100644 tables/tests/test_enum.py create mode 100644 tables/tests/test_expression.py create mode 100644 tables/tests/test_garbage.py create mode 100644 tables/tests/test_hdf5compat.py create mode 100644 tables/tests/test_index_backcompat.py create mode 100644 tables/tests/test_indexes.py create mode 100644 tables/tests/test_indexvalues.py create mode 100644 tables/tests/test_links.py create mode 100644 tables/tests/test_lists.py create mode 100644 tables/tests/test_nestedtypes.py create mode 100644 tables/tests/test_numpy.py create mode 100644 tables/tests/test_queries.py create mode 100644 tables/tests/test_ref_array1.mat create mode 100644 tables/tests/test_ref_array2.mat create mode 100644 tables/tests/test_szip.h5 create mode 100644 tables/tests/test_tables.py create mode 100644 tables/tests/test_tablesMD.py create mode 100644 tables/tests/test_timestamps.py create mode 100644 tables/tests/test_timetype.py create mode 100644 tables/tests/test_tree.py create mode 100644 tables/tests/test_types.py create mode 100644 tables/tests/test_utils.py create mode 100644 tables/tests/test_vlarray.py create mode 100644 tables/tests/time-table-vlarray-1_x.h5 create mode 100644 tables/tests/times-nested-be.h5 create mode 100644 tables/tests/vlstr_attr.h5 create mode 100644 tables/tests/vlunicode_endian.h5 create mode 100644 tables/tests/zerodim-attrs-1.3.h5 create mode 100644 tables/tests/zerodim-attrs-1.4.h5 create mode 100644 tables/undoredo.py create mode 100644 tables/unimplemented.py create mode 100644 tables/utils.py create mode 100644 tables/utilsextension.pxd create mode 100644 tables/utilsextension.pyx create mode 100644 tables/vlarray.py create mode 100755 utils/pt2to3 create mode 100755 utils/ptdump create mode 100755 utils/ptrepack create mode 100755 utils/pttree diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2889735 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +# Dirs +build/ +doc/build/ +.ropeproject/ +.idea + +# File types +*.pyc +tables/*.c +tables/*.so + +# specific files +src/version.h + +# Build artifacts +a.out +tmp/ diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..3f256ce --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,7 @@ +conda: + file: environment.yml +python: + version: 3 + setup_py_install: true + extra_requirements: + - docs diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..da7c6c3 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,36 @@ +language: python + +python: + - 2.7 + - 3.4 + - 3.5 + - 3.6 + +sudo: false + +addons: + apt_packages: + - libbz2-dev + - libhdf5-serial-dev + - liblzo2-dev + +before_install: + - pip install cython + - python setup.py sdist + +install: + - pip install dist/*.tar.gz + - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi + +script: + - cd .. && python -m tables.tests.test_all + - pt2to3 -h + - ptrepack -h + - ptdump -h + - pttree -h + +notifications: + webhooks: + urls: + - https://webhooks.gitter.im/e/58f8a9014ef02f6217ec + on_success: change diff --git a/ANNOUNCE.txt.in b/ANNOUNCE.txt.in new file mode 100644 index 0000000..fc56b7e --- /dev/null +++ b/ANNOUNCE.txt.in @@ -0,0 +1,72 @@ +=========================== + Announcing PyTables @VERSION@ +=========================== + +We are happy to announce PyTables @VERSION@. + + +What's new +========== + +#XXX version-specific blurb XXX# + +In case you want to know more in detail what has changed in this +version, please refer to: http://www.pytables.org/release_notes.html + +You can install it via pip or download a source package with generated +PDF and HTML docs from: +https://github.com/PyTables/PyTables/releases/v@VERSION@ + +For an online version of the manual, visit: +http://www.pytables.org/usersguide/index.html + + +What it is? +=========== + +PyTables is a library for managing hierarchical datasets and +designed to efficiently cope with extremely large amounts of data with +support for full 64-bit file addressing. PyTables runs on top of +the HDF5 library and NumPy package for achieving maximum throughput and +convenient use. PyTables includes OPSI, a new indexing technology, +allowing to perform data lookups in tables exceeding 10 gigarows +(10**10 rows) in less than a tenth of a second. + + +Resources +========= + +About PyTables: http://www.pytables.org + +About the HDF5 library: http://hdfgroup.org/HDF5/ + +About NumPy: http://numpy.scipy.org/ + + +Acknowledgments +=============== + +Thanks to many users who provided feature improvements, patches, bug +reports, support and suggestions. See the ``THANKS`` file in the +distribution package for a (incomplete) list of contributors. Most +specially, a lot of kudos go to the HDF5 and NumPy makers. +Without them, PyTables simply would not exist. + + +Share your experience +===================== + +Let us know of any bugs, suggestions, gripes, kudos, etc. you may have. + + +---- + + **Enjoy data!** + + -- The PyTables Developers + +.. Local Variables: +.. mode: rst +.. coding: utf-8 +.. fill-column: 72 +.. End: diff --git a/CITATION.bib b/CITATION.bib new file mode 100644 index 0000000..feb85cf --- /dev/null +++ b/CITATION.bib @@ -0,0 +1,6 @@ +@Misc{, + author = {PyTables Developers Team}, + title = {{PyTables}: Hierarchical Datasets in {Python}}, + year = {2002--}, + url = "https://www.pytables.org/" +} diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..21ffd7e --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,35 @@ +Copyright Notice and Statement for PyTables Software Library and Utilities: + +Copyright (c) 2002-2004 by Francesc Alted +Copyright (c) 2005-2007 by Carabos Coop. V. +Copyright (c) 2008-2010 by Francesc Alted +Copyright (c) 2011-2015 by PyTables maintainers +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +c. Neither the name of Francesc Alted nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/BLOSC.txt b/LICENSES/BLOSC.txt new file mode 100644 index 0000000..1226f91 --- /dev/null +++ b/LICENSES/BLOSC.txt @@ -0,0 +1,23 @@ +Blosc - A blocking, shuffling and lossless compression library + +Copyright (C) 2009-2012 Francesc Alted +Copyright (C) 2013 Francesc Alted + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/LICENSES/CLOUD-SPTHEME.txt b/LICENSES/CLOUD-SPTHEME.txt new file mode 100644 index 0000000..3144724 --- /dev/null +++ b/LICENSES/CLOUD-SPTHEME.txt @@ -0,0 +1,47 @@ +.. -*- restructuredtext -*- + +=================== +Copyright & License +=================== + +Cloud Sphinx Theme +================== +cloud_sptheme is released under the BSD license, +and is (c) `Assurance Technologies `_:: + + The "cloud_sptheme" python package and artwork is + Copyright (c) 2010-2012 by Assurance Technologies, LLC. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Assurance Technologies, nor the names of the + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Other Content +============= +Most of the icons in ``cloud_sptheme:themes/cloud/static`` +are from the `Tango Icon Project `_, +which has released them into the Public Domain. diff --git a/LICENSES/FASTLZ.txt b/LICENSES/FASTLZ.txt new file mode 100644 index 0000000..4a6abd6 --- /dev/null +++ b/LICENSES/FASTLZ.txt @@ -0,0 +1,24 @@ +FastLZ - lightning-fast lossless compression library + +Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) +Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) +Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/LICENSES/H5PY.txt b/LICENSES/H5PY.txt new file mode 100644 index 0000000..081ef02 --- /dev/null +++ b/LICENSES/H5PY.txt @@ -0,0 +1,34 @@ +Copyright Notice and Statement for the h5py Project + +Copyright (c) 2008 Andrew Collette +http://www.h5py.org +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +c. Neither the name of the author nor the names of contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/LICENSES/HDF5.txt b/LICENSES/HDF5.txt new file mode 100644 index 0000000..54126de --- /dev/null +++ b/LICENSES/HDF5.txt @@ -0,0 +1,69 @@ +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 2006-2007 by The HDF Group (THG). + +NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 1998-2006 by the Board of Trustees of the University of Illinois. + +All rights reserved. + +Contributors: National Center for Supercomputing Applications (NCSA) +at the University of Illinois, Fortner Software, Unidata Program +Center (netCDF), The Independent JPEG Group (JPEG), Jean-loup Gailly +and Mark Adler (gzip), and Digital Equipment Corporation (DEC). + +Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial +purposes) provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright +notice, this list of conditions, and the following disclaimer. + 2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions, and the following +disclaimer in the documentation and/or materials provided with the +distribution. + 3. In addition, redistributions of modified forms of the source or +binary code must carry prominent notices stating that the original +code was changed and the date of the change. + 4. All publications or advertising materials mentioning features or +use of this software are asked, but not required, to acknowledge that +it was developed by The HDF Group and by the National Center for +Supercomputing Applications at the University of Illinois at +Urbana-Champaign and credit the contributors. + 5. Neither the name of The HDF Group, the name of the University, +nor the name of any Contributor may be used to endorse or promote +products derived from this software without specific prior written +permission from THG, the University, or the Contributor, respectively. + +DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE HDF GROUP (THG) AND THE +CONTRIBUTORS "AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR +IMPLIED. In no event shall THG or the Contributors be liable for any +damages suffered by the users arising out of the use of this software, +even if advised of the possibility of such damage. + +Portions of HDF5 were developed with support from the University of +California, Lawrence Livermore National Laboratory (UC LLNL). The +following statement applies to those portions of the product and must +be retained in any redistribution of source code, binaries, +documentation, and/or accompanying materials: + +This work was partially produced at the University of California, +Lawrence Livermore National Laboratory (UC LLNL) under contract +no. W-7405-ENG-48 (Contract 48) between the U.S. Department of Energy +(DOE) and The Regents of the University of California (University) for +the operation of UC LLNL. + +DISCLAIMER: This work was prepared as an account of work sponsored by +an agency of the United States Government. Neither the United States +Government nor the University of California nor any of their +employees, makes any warranty, express or implied, or assumes any +liability or responsibility for the accuracy, completeness, or +usefulness of any information, apparatus, product, or process +disclosed, or represents that its use would not infringe privately- +owned rights. Reference herein to any specific commercial products, +process, or service by trade name, trademark, manufacturer, or +otherwise, does not necessarily constitute or imply its endorsement, +recommendation, or favoring by the United States Government or the +University of California. The views and opinions of authors expressed +herein do not necessarily state or reflect those of the United States +Government or the University of California, and shall not be used for +advertising or product endorsement purposes. diff --git a/LICENSES/LZ4.txt b/LICENSES/LZ4.txt new file mode 100644 index 0000000..2383e10 --- /dev/null +++ b/LICENSES/LZ4.txt @@ -0,0 +1,32 @@ +LZ4 - Fast LZ compression algorithm + +Copyright (C) 2011-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html +- LZ4 source repository : http://code.google.com/p/lz4/ + diff --git a/LICENSES/SNAPPY.txt b/LICENSES/SNAPPY.txt new file mode 100644 index 0000000..8d6bd9f --- /dev/null +++ b/LICENSES/SNAPPY.txt @@ -0,0 +1,28 @@ +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/STDINT.txt b/LICENSES/STDINT.txt new file mode 100644 index 0000000..486e694 --- /dev/null +++ b/LICENSES/STDINT.txt @@ -0,0 +1,26 @@ +Copyright (c) 2006-2013 Alexander Chemeris + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the product nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/WIN32PTHREADS.txt b/LICENSES/WIN32PTHREADS.txt new file mode 100644 index 0000000..bd5ced5 --- /dev/null +++ b/LICENSES/WIN32PTHREADS.txt @@ -0,0 +1,19 @@ +Copyright (C) 2009 Andrzej K. Haczewski + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/LICENSES/ZLIB.txt b/LICENSES/ZLIB.txt new file mode 100644 index 0000000..5d74f5c --- /dev/null +++ b/LICENSES/ZLIB.txt @@ -0,0 +1,22 @@ +Copyright notice: + + (C) 1995-2013 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu diff --git a/LICENSES/ZSTD.TXT b/LICENSES/ZSTD.TXT new file mode 100644 index 0000000..a793a80 --- /dev/null +++ b/LICENSES/ZSTD.TXT @@ -0,0 +1,30 @@ +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..e75619d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,28 @@ +include MANIFEST.in +include *.txt THANKS README.rst +include setup.py setup.cfg VERSION Makefile cpuinfo.py + +recursive-include tables *.py *.pyx *.pxd *.c +recursive-include tables/tests *.h5 *.mat +recursive-include tables/nodes/tests *.h5 *.dat *.xbm +recursive-include src *.c *.h Makefile + +include hdf5-blosc/src/blosc_filter.? +recursive-include c-blosc/blosc *.c *.h +recursive-include c-blosc/internal-complibs *.c *.cc *.h + +recursive-include LICENSES * +recursive-include utils * +include doc/Makefile doc/make.bat +#include doc/*.pdf +recursive-include doc *.rst *.conf *.py *.*_t +recursive-include doc *.html *.js *.css *.png *.ico +recursive-include doc/source *.pdf objecttree.svg +#recursive-include doc/source *.pdf *.svg +recursive-include doc/html *.txt *.svg *.gif *.inv +recursive-include doc/scripts *.py +recursive-include doc/sphinxext * +recursive-exclude doc/build * +recursive-include examples *.py *.sh +recursive-include bench *.sh *.py *.txt *.h5 *.gnuplot +recursive-include contrib README *.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bc2b685 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +# This Makefile is only intended to prepare for distribution the PyTables +# sources exported from a repository. For building and installing PyTables, +# please use ``setup.py`` as described in the ``README.rst`` file. + +VERSION = $(shell cat VERSION) +SRCDIRS = src doc +GENERATED = ANNOUNCE.txt +PYTHON = python3 +PYPLATFORM = $(shell $(PYTHON) -c "from distutils.util import get_platform; print(get_platform())") +PYVER = $(shell $(PYTHON) -V 2>&1 | cut -c 8-10) +PYBUILDDIR = $(PWD)/build/lib.$(PYPLATFORM)-$(PYVER) +OPT = PYTHONPATH=$(PYBUILDDIR) + + +.PHONY: all dist build check heavycheck clean distclean html + +all: $(GENERATED) build + for srcdir in $(SRCDIRS) ; do $(MAKE) -C $$srcdir $(OPT) $@ ; done + +dist: all + $(PYTHON) setup.py sdist + cd dist && md5sum tables-$(VERSION).tar.gz > pytables-$(VERSION).md5 && cd - + cp RELEASE_NOTES.txt dist/RELEASE_NOTES-$(VERSION).txt + for srcdir in $(SRCDIRS) ; do $(MAKE) -C $$srcdir $(OPT) $@ ; done + +clean: + rm -rf MANIFEST build dist tmp tables/__pycache__ + rm -rf bench/*.h5 bench/*.prof + rm -rf examples/*.h5 examples/raw + rm -f $(GENERATED) tables/*.so a.out + find . '(' -name '*.py[co]' -o -name '*~' ')' -exec rm '{}' ';' + for srcdir in $(SRCDIRS) ; do $(MAKE) -C $$srcdir $(OPT) $@ ; done + +distclean: clean + for srcdir in $(SRCDIRS) ; do $(MAKE) -C $$srcdir $(OPT) $@ ; done + rm -f tables/_comp_*.c tables/*extension.c + #git clean -fdx + +html: build + $(MAKE) -C doc $(OPT) html + +%: %.in VERSION + cat "$<" | sed -e 's/@VERSION@/$(VERSION)/g' > "$@" + +build: + $(PYTHON) setup.py build + +check: build + cd build/lib.*-$(PYVER) && env PYTHONPATH=. $(PYTHON) tables/tests/test_all.py + +heavycheck: build + cd build/lib.*-$(PYVER) && env PYTHONPATH=. $(PYTHON) tables/tests/test_all.py --heavy diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..6bb5235 --- /dev/null +++ b/README.rst @@ -0,0 +1,152 @@ +=========================================== + PyTables: hierarchical datasets in Python +=========================================== + +.. image:: https://badges.gitter.im/Join%20Chat.svg + :alt: Join the chat at https://gitter.im/PyTables/PyTables + :target: https://gitter.im/PyTables/PyTables + +.. image:: https://travis-ci.org/PyTables/PyTables.svg?branch=develop + :target: https://travis-ci.org/PyTables/PyTables + +.. image:: https://ci.appveyor.com/api/projects/status/github/PyTables/PyTables?branch=develop&svg=true + :target: https://ci.appveyor.com/project/PyTablesCI/pytables + +.. image:: https://codeclimate.com/github/PyTables/PyTables/badges/gpa.svg + :target: https://codeclimate.com/github/PyTables/PyTables + :alt: Code Climate + +:URL: http://www.pytables.org/ + + +PyTables is a package for managing hierarchical datasets and designed +to efficiently cope with extremely large amounts of data. + +It is built on top of the HDF5 library and the NumPy package. It +features an object-oriented interface that, combined with C extensions +for the performance-critical parts of the code (generated using +Cython), makes it a fast, yet extremely easy to use tool for +interactively save and retrieve very large amounts of data. One +important feature of PyTables is that it optimizes memory and disk +resources so that they take much less space (between a factor 3 to 5, +and more if the data is compressible) than other solutions, like for +example, relational or object oriented databases. + +State-of-the-art compression +---------------------------- + +PyTables comes with out-of-box support for the `Blosc compressor +`_. This allows for extremely high compression +speed, while keeping decent compression ratios. By doing so, I/O can +be accelerated by a large extent, and you may end achieving higher +performance than the bandwidth provided by your I/O subsystem. See +the `Tuning The Chunksize section of the Optimization Tips chapter +`_ +of user documentation for some benchmarks. + +Not a RDBMS replacement +----------------------- + +PyTables is not designed to work as a relational database replacement, +but rather as a teammate. If you want to work with large datasets of +multidimensional data (for example, for multidimensional analysis), or +just provide a categorized structure for some portions of your +cluttered RDBS, then give PyTables a try. It works well for storing +data from data acquisition systems (DAS), simulation software, network +data monitoring systems (for example, traffic measurements of IP +packets on routers), or as a centralized repository for system logs, +to name only a few possible uses. + +Tables +------ + +A table is defined as a collection of records whose values are stored +in fixed-length fields. All records have the same structure and all +values in each field have the same data type. The terms "fixed-length" +and strict "data types" seems to be quite a strange requirement for an +interpreted language like Python, but they serve a useful function if +the goal is to save very large quantities of data (such as is +generated by many scientific applications, for example) in an +efficient manner that reduces demand on CPU time and I/O. + +Arrays +------ + +There are other useful objects like arrays, enlargeable arrays or +variable length arrays that can cope with different missions on your +project. + +Easy to use +----------- + +One of the principal objectives of PyTables is to be user-friendly. +In addition, many different iterators have been implemented so as to +enable the interactive work to be as productive as possible. + +Platforms +--------- + +We are using Linux on top of Intel32 and Intel64 boxes as the main +development platforms, but PyTables should be easy to compile/install +on other UNIX or Windows machines. + +Compiling +--------- + +To compile PyTables you will need, at least, a recent version of HDF5 +(C flavor) library, the Zlib compression library and the NumPy and +Numexpr packages. Besides, it comes with support for the Blosc, LZO +and bzip2 compressor libraries. Blosc is mandatory, but PyTables comes +with Blosc sources so, although it is recommended to have Blosc +installed in your system, you don't absolutely need to install it +separately. LZO and bzip2 compression libraries are, however, +optional. + +Installation +------------ + +1. Make sure you have HDF5 version 1.8.4 or above. + + On OSX you can install HDF5 using `Homebrew `_:: + + $ brew tap homebrew/science + $ brew install hdf5 + + On ubuntu:: + + $ sudo apt-get install libhdf5-serial-dev + + If you have the HDF5 library in some non-standard location (that + is, where the compiler and the linker can't find it) you can use + the environment variable `HDF5_DIR` to specify its location. See + `the manual + `_ for more + details. + +3. For stability (and performance too) reasons, it is strongly + recommended that you install the C-Blosc library separately, + although you might want PyTables to use its internal C-Blosc + sources. + +3. Optionally, consider to install the LZO compression library and/or + the bzip2 compression library. + +4. Install!:: + + $ pip install tables + +5. To run the test suite run:: + + $ python -m tables.tests.test_all + + If there is some test that does not pass, please send the + complete output for tests back to us. + + +**Enjoy data!** -- The PyTables Team + +.. Local Variables: +.. mode: text +.. coding: utf-8 +.. fill-column: 70 +.. End: diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt new file mode 100644 index 0000000..226c0f9 --- /dev/null +++ b/RELEASE_NOTES.txt @@ -0,0 +1,112 @@ +======================================= + Release notes for PyTables 3.4 series +======================================= + +:Author: PyTables Developers +:Contact: pytables-dev@googlegroups.com + +.. py:currentmodule:: tables + +Changes from 3.4.3 to 3.4.4 +=========================== + +Improvements +------------ + - Environment variable to control the use of embedded libraries. + Thanks to avalentino. + - Include citation in repository. + :issue:`690`. Thanks to katrinleinweber. + +Bugs fixed +---------- + - Fixed import error with numexpr 2.6.5.dev0 + :issue:`685`. Thanks to cgohlke. + - Fixed linter warnings. + Thanks to avalentino. + - Fixed for re.split() is version detection. + :issue:`687`. Thanks to mingwandroid. + - Fixed test failures with Python 2.7 and NumPy 1.14.3 + :issue:`688` & :issue:`689`. Thanks to oleksandr-pavlyk. + + +Changes from 3.4.2 to 3.4.3 +=========================== + +Improvements +------------ + - On interactive python sessions, group/attribute `__dir__()` method + autocompletes children that are named as valid python identifiers. + :issue:`624` & :issue:`625` thanks to ankostis. + - Implement `Group.__getitem__()` to have groups act as python-containers, + so code like this works: ``hfile.root['some child']``. + :issue:`628` thanks to ankostis. + - Enable building with Intel compiler (icc/icpc). + Thanks to rohit-jamuar. + - PEP 519 support, using new `os.fspath` method. + Thanks to mruffalo. + - Optional disable recording of ctime (metadata creation time) when + creating datasets that makes possible to get bitwise identical output + from repeated runs. + Thanks to alex-cobb. + - Prevent from reading all rows for each coord in a VLArray when + indexing using a list . + Thanks to igormq. + - Internal Blosc version updated to 1.14.3 + +Bugs fixed +---------- + - Fixed division by zero when using `_convert_time64()` with an empty + nparr array. + :issue:`653`. Thanks to alobbs. + - Fixed deprecation warnings with numpy 1.14. + Thanks to oleksandr-pavlyk. + - Skip DLL check when running from a frozen app. + :issue:`675`. Thanks to jwiggins. + - Fixed behaviour with slices out of range. + :issue:`651`. Thanks to jackdbd. + + +Changes from 3.4.1 to 3.4.2 +=========================== + +Improvements +------------ + - setup.py detects conda env and uses installed conda (hdf5, bzip2, lzo + and/or blosc) packages when building from source. + +Bugs fixed +---------- + - Linux wheels now built against built-in blosc. + - Fixed windows absolute paths in ptrepack, ptdump, ptree. + :issue:`616`. Thanks to oscar6echo. + + +Changes from 3.4.0 to 3.4.1 +=========================== + +Bugs fixed +---------- + - Fixed bug in ptrepack + + +Changes from 3.3.0 to 3.4.0 +=========================== + +Improvements +------------ + - Support for HDF5 v1.10.x (see :issue:`582`) + - Fix compatibility with the upcoming Python 2.7.13, 3.5.3 and 3.6 versions. + See also :issue:`590`. Thanks to Yaroslav Halchenko + - Internal Blosc version updated to 1.11.3 + - Gracefully handle cpuinfo failure. (PR #578) + Thanks to Zbigniew Jędrzejewski-Szmek + - Update internal py-cpuinfo to 3.3.0. Thanks to Gustavo Serra Scalet. + +Bugs fixed +---------- + - Fix conversion of python 2 `long` type to `six.integer_types` in atom.py. + See also :issue:`598`. Thanks to Kyle Keppler for reporting. + - Fix important bug in bitshuffle filter in internal Blosc on big-endian + machines. See also :issue:`583`. + - Fix allow for long type in nextafter. (PR #587) Thanks to Yaroslav Halchenko. + - Fix unicode bug in group and tables names. :issue:`514` diff --git a/THANKS b/THANKS new file mode 100644 index 0000000..0ba6df4 --- /dev/null +++ b/THANKS @@ -0,0 +1,86 @@ +March 2009 + +We would like to thank the people have contributed directly or +indirectly to PyTables. + +Scott Prater for editing the user's manual in order to make it more +readable in english, as well as conducting the tests of PyTables on +Solaris. + +Alan McIntyre for porting PyTables to Windows. + +John Nielsen for suggesting improvements and delivering code for +completely avoid the recursion algorithms and allowing pytables to +bypass the ~1000 levels of deepness that Python recursion limit +imposed. + +Tom Hedley for providing a nice patch for supporting complex datatypes +for Arrays, Errays and VLArrays. This was the root for the support of +complex types in Tables as well. + +Shack Toms for providing a Python version of the nextafter and +nextafterf math functions that despite the fact they are standard in +C99 standard, they are not at the official places in Microsoft VC++ +6.x nor VC++ 7.x. + +Jeff Whitaker for providing the NetCDF module and the utility for +converting netCDF files to HDF5 (nctoh5). + +Norbert Nemec for providing several interesting patches. + +Andrew Straw for suggesting to bracket the most intensive parts of +PyTables with BEGIN_ALLOW_THREADS and END_ALLOW_THREADS. That will +allow much better performance of PyTables apps in mutiprocessors +platforms. + +Antonio Valentino for providing several patches for supporting +native multidimensional attributes and the CArray object. + +Ashley Walsh, for reporting several problems and fixes. It has helped +testing OSX platform, specially UCL compressor issues. + +Russel Howe, for reporting and providing an initial patch for a nasty +memory leak when reading VLArray types. + +The HDF5 team at NCSA for making such an excellent library for data +persistence, and specially Pedro Vicente, Quincey Koziol and Elena +Pourmal, for quickly including my suggested patches to the HDF5_HL and +solving the reported bugs in HDF5 library. + +Todd Miller and Perry Greenfield for promptly helping me to understand +many of the intricacies of the numarray package and Jin-chung Hsu for +discussions on recarray module (now numarray.records module). They +have been very receptive and promptly worked-out most of the +improvements in numarray (specially in the records module) that were +necessary for PyTables. + +Travis Oliphant for its impressive work and responsiveness with NumPy. + +Evan Prodromou for his lrucache package, a very sleek implementation +of an LRU queue. He had a very helpful attitude with the licensing +and technical issues. + +Gerard Vermeulen for Windows/MSVS-2005 testing. + +Enric Cervera for testing the binaries for MacOSX/Intel. + +Daniel Bungert, Steve Langasek and Alexandre Fayolle for their support +in creating Debian packages for PyTables. + +Greg Ewing for writing the excelent Pyrex tool and allowing to beginners +like me to quickly and safely start writing Python extensions. He was +also very responsive about questions on Pyrex. + +Stefan Behnel, Robert Bradshaw, and Dag Sverre Seljebotn for their +impressive work with Cython. + +Andrew Collette, for his excellent work on the h5py project, from +which PyTables starts to stole ideas (and code too ;-). + +Guido, you know who. + +And last, but definitely not least!, + +To those companies that are supporting the PyTables project with +contracts. + diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..f989260 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +3.4.4 diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..72621cb --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,77 @@ +environment: + PACKAGE_NAME: tables + BUILD_DEPENDS: "numpy>=1.8.0 numexpr>=2.5.2 six cython bzip2" + TEST_DEPENDS: "numpy>=1.8.0 numexpr>=2.5.2 six mock" + # Make setup.py include conda specific dll's in wheel + BUILDWHEEL: True + + global: + # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the + # /E:ON and /V:ON options are not enabled in the batch script intepreter + # See: http://stackoverflow.com/a/13751649/163740 + # From: https://github.com/astropy/ci-helpers/blob/master/appveyor/windows_sdk.cmd + CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\appveyor\\windows_sdk.cmd" + + matrix: +# - PYTHON: "C:\\Miniconda" +# PYTHON_VERSION: "2.7" +# PYTHON_ARCH: "32" + + - PYTHON: "C:\\Miniconda-x64" + PYTHON_VERSION: "2.7" + PYTHON_ARCH: "64" + HDF5_VERSION: "1.8" + +# - PYTHON: "C:\\Miniconda-x64" +# PYTHON_VERSION: "2.7" +# PYTHON_ARCH: "64" +# HDF5_VERSION: "1.10" + + - PYTHON: "C:\\Miniconda36-x64" + PYTHON_VERSION: "3.6" + PYTHON_ARCH: "64" + HDF5_VERSION: "1.8" + + - PYTHON: "C:\\Miniconda36-x64" + PYTHON_VERSION: "3.6" + PYTHON_ARCH: "64" + HDF5_VERSION: "1.10" + +install: + # this installs the appropriate Miniconda (Py2/Py3, 32/64 bit), + # as well as pip, conda-build, and the binstar CLI + #- powershell .\\ci\\appveyor\\install.ps1 + - powershell .\\ci\\appveyor\\missing-headers.ps1 + - "set PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" + +build_script: + - conda config --add channels conda-forge + - conda create --yes -n build_env python=%PYTHON_VERSION% %BUILD_DEPENDS% + - activate build_env + + # Install HDF5 Library + # Add tomkooij channel for HDF5 v1.10 conda packages + - conda config --add channels http://conda.anaconda.org/tomkooij + - conda install --yes hdf5=%HDF5_VERSION% + + # Build wheel + - "%CMD_IN_ENV% python setup.py bdist_wheel" + +test_script: + # create test env + - conda create --yes -n test_env python=%PYTHON_VERSION% %TEST_DEPENDS% + - activate test_env + + # install from wheel + - pip install --no-index --find-links dist/ %PACKAGE_NAME% + + - cd .. + - python -m tables.tests.test_all + + - pt2to3 -h + - ptrepack -h + - ptdump -h + - pttree -h + +artifacts: + - path: "dist\\*" diff --git a/bench/LRU-experiments.py b/bench/LRU-experiments.py new file mode 100644 index 0000000..f15e9de --- /dev/null +++ b/bench/LRU-experiments.py @@ -0,0 +1,98 @@ +# Testbed to perform experiments in order to determine best values for +# the node numbers in LRU cache. Tables version. + +from __future__ import print_function +from time import time +from tables import * +import tables + +print("PyTables version-->", tables.__version__) + +filename = "/tmp/junk-tables-100.h5" +NLEAVES = 2000 +NROWS = 1000 + + +class Particle(IsDescription): + name = StringCol(16, pos=1) # 16-character String + lati = Int32Col(pos=2) # integer + longi = Int32Col(pos=3) # integer + pressure = Float32Col(pos=4) # float (single-precision) + temperature = Float64Col(pos=5) # double (double-precision) + + +def create_junk(): + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w") + # Create a new group + group = fileh.create_group(fileh.root, "newgroup") + + for i in range(NLEAVES): + # Create a new table in newgroup group + table = fileh.create_table(group, 'table' + str(i), Particle, + "A table", Filters(1)) + particle = table.row + print("Creating table-->", table._v_name) + + # Fill the table with particles + for i in range(NROWS): + # This injects the row values. + particle.append() + table.flush() + + # Finally, close the file + fileh.close() + + +def modify_junk_LRU(): + fileh = open_file(filename, 'a') + group = fileh.root.newgroup + for j in range(5): + print("iter -->", j) + for tt in fileh.walk_nodes(group): + if isinstance(tt, Table): + pass +# for row in tt: +# pass + fileh.close() + + +def modify_junk_LRU2(): + fileh = open_file(filename, 'a') + group = fileh.root.newgroup + for j in range(20): + t1 = time() + for i in range(100): + #print("table-->", tt._v_name) + tt = getattr(group, "table" + str(i)) + #for row in tt: + # pass + print("iter and time -->", j + 1, round(time() - t1, 3)) + fileh.close() + + +def modify_junk_LRU3(): + fileh = open_file(filename, 'a') + group = fileh.root.newgroup + for j in range(3): + t1 = time() + for tt in fileh.walk_nodes(group, "Table"): + tt.attrs.TITLE + for row in tt: + pass + print("iter and time -->", j + 1, round(time() - t1, 3)) + fileh.close() + +if 1: + # create_junk() + # modify_junk_LRU() # uses the iterator version (walk_nodes) + # modify_junk_LRU2() # uses a regular loop (getattr) + modify_junk_LRU3() # uses a regular loop (getattr) +else: + import profile + import pstats + profile.run('modify_junk_LRU2()', 'modify.prof') + stats = pstats.Stats('modify.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats() diff --git a/bench/LRU-experiments2.py b/bench/LRU-experiments2.py new file mode 100644 index 0000000..564352b --- /dev/null +++ b/bench/LRU-experiments2.py @@ -0,0 +1,57 @@ +# Testbed to perform experiments in order to determine best values for +# the node numbers in LRU cache. Arrays version. + +from __future__ import print_function +from time import time +import tables + +print("PyTables version-->", tables.__version__) + +filename = "/tmp/junk-array.h5" +NOBJS = 1000 + + +def create_junk(): + fileh = tables.open_file(filename, mode="w") + for i in range(NOBJS): + fileh.create_array(fileh.root, 'array' + str(i), [1]) + fileh.close() + + +def modify_junk_LRU(): + fileh = tables.open_file(filename, 'a') + group = fileh.root + for j in range(5): + print("iter -->", j) + for tt in fileh.walk_nodes(group): + if isinstance(tt, tables.Array): +# d = tt.read() + pass + + fileh.close() + + +def modify_junk_LRU2(): + fileh = tables.open_file(filename, 'a') + group = fileh.root + for j in range(5): + t1 = time() + for i in range(100): # The number + #print("table-->", tt._v_name) + tt = getattr(group, "array" + str(i)) + #d = tt.read() + print("iter and time -->", j + 1, round(time() - t1, 3)) + fileh.close() + +if 1: + # create_junk() + # modify_junk_LRU() # uses the iterador version (walk_nodes) + modify_junk_LRU2() # uses a regular loop (getattr) +else: + import profile + import pstats + profile.run('modify_junk_LRU2()', 'modify.prof') + stats = pstats.Stats('modify.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats() diff --git a/bench/LRUcache-node-bench.py b/bench/LRUcache-node-bench.py new file mode 100644 index 0000000..eb5e51f --- /dev/null +++ b/bench/LRUcache-node-bench.py @@ -0,0 +1,80 @@ +from __future__ import print_function + +import sys +import numpy +import tables +from time import time +#import psyco + +filename = "/tmp/LRU-bench.h5" +nodespergroup = 250 +niter = 100 + +print('nodespergroup:', nodespergroup) +print('niter:', niter) + +if len(sys.argv) > 1: + NODE_CACHE_SLOTS = int(sys.argv[1]) + print('NODE_CACHE_SLOTS:', NODE_CACHE_SLOTS) +else: + NODE_CACHE_SLOTS = tables.parameters.NODE_CACHE_SLOTS +f = tables.open_file(filename, "w", node_cache_slots=NODE_CACHE_SLOTS) +g = f.create_group("/", "NodeContainer") +print("Creating nodes") +for i in range(nodespergroup): + f.create_array(g, "arr%d" % i, [i]) +f.close() + +f = tables.open_file(filename) + + +def iternodes(): +# for a in f.root.NodeContainer: +# pass + indices = numpy.random.randn(nodespergroup * niter) * \ + 30 + nodespergroup / 2. + indices = indices.astype('i4').clip(0, nodespergroup - 1) + g = f.get_node("/", "NodeContainer") + for i in indices: + a = f.get_node(g, "arr%d" % i) + # print("a-->", a) + +print("reading nodes...") +# First iteration (put in LRU cache) +t1 = time() +for a in f.root.NodeContainer: + pass +print("time (init cache)-->", round(time() - t1, 3)) + + +def timeLRU(): + # Next iterations + t1 = time() +# for i in range(niter): +# iternodes() + iternodes() + print("time (from cache)-->", round((time() - t1) / niter, 3)) + + +def profile(verbose=False): + import pstats + import cProfile as prof + prof.run('timeLRU()', 'out.prof') + stats = pstats.Stats('out.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + +# profile() +# psyco.bind(timeLRU) +timeLRU() + +f.close() + +# for N in 0 4 8 16 32 64 128 256 512 1024 2048 4096; do +# env PYTHONPATH=../build/lib.linux-x86_64-2.7 \ +# python LRUcache-node-bench.py $N; +# done diff --git a/bench/bench-postgres-ranges.sh b/bench/bench-postgres-ranges.sh new file mode 100755 index 0000000..be72475 --- /dev/null +++ b/bench/bench-postgres-ranges.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +export PYTHONPATH=..${PYTHONPATH:+:$PYTHONPATH} + +pyopt="-O -u" +#qlvl="-Q8 -x" +#qlvl="-Q8" +qlvl="-Q7" +#size="500m" +size="1g" + +#python $pyopt indexed_search.py -P -c -n $size -m -v +python $pyopt indexed_search.py -P -i -n $size -m -v -sfloat $qlvl + diff --git a/bench/bench-pytables-ranges.sh b/bench/bench-pytables-ranges.sh new file mode 100755 index 0000000..d73efdf --- /dev/null +++ b/bench/bench-pytables-ranges.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +#export LD_LIBRARY_PATH=$HOME/computacio/hdf5-1.8.2/hdf5/lib +export PYTHONPATH=..${PYTHONPATH:+:$PYTHONPATH} + +bench="python2.7 -O -u indexed_search.py" +flags="-T -m -v " +#sizes="1g 500m 200m 100m 50m 20m 10m 5m 2m 1m" +sizes="1g" +#sizes="1m" +working_dir="data.nobackup" +#working_dir="/scratch2/faltet" + +#for comprlvl in '-z0' '-z1 -llzo' '-z1 -lzlib' ; do +#for comprlvl in '-z6 -lblosc' '-z3 -lblosc' '-z1 -lblosc' ; do +for comprlvl in '-z5 -lblosc' ; do +#for comprlvl in '-z0' ; do + for optlvl in '-tfull -O9' ; do + #for optlvl in '-tultralight -O3' '-tlight -O6' '-tmedium -O6' '-tfull -O9'; do + #for optlvl in '-tultralight -O3'; do + #rm -f $working_dir/* # XXX esta ben posat?? + for mode in '-Q8 -i -s float' ; do + #for mode in -c '-Q7 -i -s float' ; do + #for mode in '-c -s float' '-Q8 -I -s float' '-Q8 -S -s float'; do + for size in $sizes ; do + $bench $flags $mode -n $size $optlvl $comprlvl -d $working_dir + done + done + done +done diff --git a/bench/bench-pytables.sh b/bench/bench-pytables.sh new file mode 100755 index 0000000..caa620b --- /dev/null +++ b/bench/bench-pytables.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +export LD_LIBRARY_PATH=$HOME/computacio/hdf5-1.8.1/hdf5/lib +#export PYTHONPATH=..${PYTHONPATH:+:$PYTHONPATH} + +bench="python2.7 -O -u indexed_search.py" +flags="-T -m -v -d data.nobackup" +#sizes="1m 2m 5m 10m 20m 50m 100m 200m 500m 1g" +sizes="2g 1g 500m 200m 100m 50m 20m 10m 5m 2m 1m 500k 200k 100k 50k 20k 10k 5k 2k 1k" +#sizes="1m 100k" + +#for optimlvl in 0 1 2 3 4 5 6 7 8 9 ; do +for idxtype in ultralight light medium full; do +#for idxtype in medium full; do + for optimlvl in 0 3 6 9; do + for compropt in '' '-z1 -lzlib' '-z1 -llzo' ; do + #for compropt in '-z1 -llzo' ; do + rm -rf data.nobackup/* # Atencio: esta correctament posat? + #for mode in -c '-i -s float' ; do + for mode in -c '-i' ; do + for size in $sizes ; do + $bench $flags $mode -n $size -O $optimlvl -t $idxtype $compropt + done + done + done + done +done +rm -rf data.nobackup diff --git a/bench/blosc.py b/bench/blosc.py new file mode 100644 index 0000000..54b2c9b --- /dev/null +++ b/bench/blosc.py @@ -0,0 +1,165 @@ +from __future__ import print_function +import os +import sys +from time import time +import numpy as np +import tables as tb + + +niter = 3 +dirname = "/scratch2/faltet/blosc-data/" +#expression = "a**2 + b**3 + 2*a*b + 3" +#expression = "a+b" +#expression = "a**2 + 2*a/b + 3" +#expression = "(a+b)**2 - (a**2 + b**2 + 2*a*b) + 1.1" +expression = "3*a-2*b+1.1" +shuffle = True + + +def create_file(kind, prec, synth): + prefix_orig = 'cellzome/cellzome-' + iname = dirname + prefix_orig + 'none-' + prec + '.h5' + f = tb.open_file(iname, "r") + + if prec == "single": + type_ = tb.Float32Atom() + else: + type_ = tb.Float64Atom() + + if synth: + prefix = 'synth/synth-' + else: + prefix = 'cellzome/cellzome-' + + for clevel in range(10): + oname = '%s/%s-%s%d-%s.h5' % (dirname, prefix, kind, clevel, prec) + # print "creating...", iname + f2 = tb.open_file(oname, "w") + + if kind in ["none", "numpy"]: + filters = None + else: + filters = tb.Filters( + complib=kind, complevel=clevel, shuffle=shuffle) + + for name in ['maxarea', 'mascotscore']: + col = f.get_node('/', name) + r = f2.create_carray('/', name, type_, col.shape, filters=filters) + if synth: + r[:] = np.arange(col.nrows, dtype=type_.dtype) + else: + r[:] = col[:] + f2.close() + if clevel == 0: + size = 1.5 * float(os.stat(oname)[6]) + f.close() + return size + + +def create_synth(kind, prec): + + prefix_orig = 'cellzome/cellzome-' + iname = dirname + prefix_orig + 'none-' + prec + '.h5' + f = tb.open_file(iname, "r") + + if prec == "single": + type_ = tb.Float32Atom() + else: + type_ = tb.Float64Atom() + + prefix = 'synth/synth-' + for clevel in range(10): + oname = '%s/%s-%s%d-%s.h5' % (dirname, prefix, kind, clevel, prec) + # print "creating...", iname + f2 = tb.open_file(oname, "w") + + if kind in ["none", "numpy"]: + filters = None + else: + filters = tb.Filters( + complib=kind, complevel=clevel, shuffle=shuffle) + + for name in ['maxarea', 'mascotscore']: + col = f.get_node('/', name) + r = f2.create_carray('/', name, type_, col.shape, filters=filters) + if name == 'maxarea': + r[:] = np.arange(col.nrows, dtype=type_.dtype) + else: + r[:] = np.arange(col.nrows, 0, dtype=type_.dtype) + + f2.close() + if clevel == 0: + size = 1.5 * float(os.stat(oname)[6]) + f.close() + return size + + +def process_file(kind, prec, clevel, synth): + + if kind == "numpy": + lib = "none" + else: + lib = kind + if synth: + prefix = 'synth/synth-' + else: + prefix = 'cellzome/cellzome-' + iname = '%s/%s-%s%d-%s.h5' % (dirname, prefix, kind, clevel, prec) + f = tb.open_file(iname, "r") + a_ = f.root.maxarea + b_ = f.root.mascotscore + + oname = '%s/%s-%s%d-%s-r.h5' % (dirname, prefix, kind, clevel, prec) + f2 = tb.open_file(oname, "w") + if lib == "none": + filters = None + else: + filters = tb.Filters(complib=lib, complevel=clevel, shuffle=shuffle) + if prec == "single": + type_ = tb.Float32Atom() + else: + type_ = tb.Float64Atom() + r = f2.create_carray('/', 'r', type_, a_.shape, filters=filters) + + if kind == "numpy": + a2, b2 = a_[:], b_[:] + t0 = time() + r = eval(expression, {'a': a2, 'b': b2}) + print("%5.2f" % round(time() - t0, 3)) + else: + expr = tb.Expr(expression, {'a': a_, 'b': b_}) + expr.set_output(r) + expr.eval() + f.close() + f2.close() + size = float(os.stat(iname)[6]) + float(os.stat(oname)[6]) + return size + + +if __name__ == '__main__': + if len(sys.argv) > 3: + kind = sys.argv[1] + prec = sys.argv[2] + if sys.argv[3] == "synth": + synth = True + else: + synth = False + else: + print("3 parameters required") + sys.exit(1) + + # print "kind, precision, synth:", kind, prec, synth + + # print "Creating input files..." + size_orig = create_file(kind, prec, synth) + + # print "Processing files for compression levels in range(10)..." + for clevel in range(10): + t0 = time() + ts = [] + for i in range(niter): + size = process_file(kind, prec, clevel, synth) + ts.append(time() - t0) + t0 = time() + ratio = size_orig / size + print("%5.2f, %5.2f" % (round(min(ts), 3), ratio)) diff --git a/bench/bsddb-table-bench.py b/bench/bsddb-table-bench.py new file mode 100644 index 0000000..49c4f80 --- /dev/null +++ b/bench/bsddb-table-bench.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +###### WARNING ####### +### This script is obsoleted ### +# If you get it working again, please drop me a line +# F. Alted 2004-01-27 + +from __future__ import print_function +import sys +import struct +import cPickle + +from tables import * +import numpy as np + +try: + # For Python 2.3 + from bsddb import db +except ImportError: + # For earlier Pythons w/distutils pybsddb + from bsddb3 import db +import psyco + + +# This class is accessible only for the examples +class Small(IsDescription): + """Record descriptor. + + A record has several columns. They are represented here as class + attributes, whose names are the column names and their values will + become their types. The IsColDescr class will take care the user + will not add any new variables and that its type is correct. + + """ + + var1 = StringCol(itemsize=16) + var2 = Int32Col() + var3 = Float64Col() + +# Define a user record to characterize some kind of particles + + +class Medium(IsDescription): + name = StringCol(itemsize=16, pos=0) # 16-character String + #float1 = Float64Col(shape=2, dflt=2.3) + float1 = Float64Col(dflt=1.3, pos=1) + float2 = Float64Col(dflt=2.3, pos=2) + ADCcount = Int16Col(pos=3) # signed short integer + grid_i = Int32Col(pos=4) # integer + grid_j = Int32Col(pos=5) # integer + pressure = Float32Col(pos=6) # float (single-precision) + energy = Float64Col(pos=7) # double (double-precision) + +# Define a user record to characterize some kind of particles + + +class Big(IsDescription): + name = StringCol(itemsize=16) # 16-character String + #float1 = Float64Col(shape=32, dflt=np.arange(32)) + #float2 = Float64Col(shape=32, dflt=np.arange(32)) + float1 = Float64Col(shape=32, dflt=range(32)) + float2 = Float64Col(shape=32, dflt=[2.2] * 32) + ADCcount = Int16Col() # signed short integer + grid_i = Int32Col() # integer + grid_j = Int32Col() # integer + pressure = Float32Col() # float (single-precision) + energy = Float64Col() # double (double-precision) + + +def createFile(filename, totalrows, recsize, verbose): + + # Open a 'n'ew file + dd = db.DB() + if recsize == "big": + isrec = Description(Big) + elif recsize == "medium": + isrec = Medium() + else: + isrec = Description(Small) + # dd.set_re_len(struct.calcsize(isrec._v_fmt)) # fixed length records + dd.open(filename, db.DB_RECNO, db.DB_CREATE | db.DB_TRUNCATE) + + rowswritten = 0 + # Get the record object associated with the new table + if recsize == "big": + isrec = Big() + arr = np.array(np.arange(32), type=np.Float64) + arr2 = np.array(np.arange(32), type=np.Float64) + elif recsize == "medium": + isrec = Medium() + arr = np.array(np.arange(2), type=np.Float64) + else: + isrec = Small() + # print d + # Fill the table + if recsize == "big" or recsize == "medium": + d = {"name": " ", + "float1": 1.0, + "float2": 2.0, + "ADCcount": 12, + "grid_i": 1, + "grid_j": 1, + "pressure": 1.9, + "energy": 1.8, + } + for i in range(totalrows): + #d['name'] = 'Particle: %6d' % (i) + #d['TDCcount'] = i % 256 + d['ADCcount'] = (i * 256) % (1 << 16) + if recsize == "big": + #d.float1 = np.array([i]*32, np.Float64) + #d.float2 = np.array([i**2]*32, np.Float64) + arr[0] = 1.1 + d['float1'] = arr + arr2[0] = 2.2 + d['float2'] = arr2 + pass + else: + d['float1'] = float(i) + d['float2'] = float(i) + d['grid_i'] = i + d['grid_j'] = 10 - i + d['pressure'] = float(i * i) + d['energy'] = d['pressure'] + dd.append(cPickle.dumps(d)) +# dd.append(struct.pack(isrec._v_fmt, +# d['name'], d['float1'], d['float2'], +# d['ADCcount'], +# d['grid_i'], d['grid_j'], +# d['pressure'], d['energy'])) + else: + d = {"var1": " ", "var2": 1, "var3": 12.1e10} + for i in range(totalrows): + d['var1'] = str(i) + d['var2'] = i + d['var3'] = 12.1e10 + dd.append(cPickle.dumps(d)) + #dd.append( + # struct.pack(isrec._v_fmt, d['var1'], d['var2'], d['var3'])) + + rowswritten += totalrows + + # Close the file + dd.close() + return (rowswritten, struct.calcsize(isrec._v_fmt)) + + +def readFile(filename, recsize, verbose): + # Open the HDF5 file in read-only mode + #fileh = shelve.open(filename, "r") + dd = db.DB() + if recsize == "big": + isrec = Big() + elif recsize == "medium": + isrec = Medium() + else: + isrec = Small() + # dd.set_re_len(struct.calcsize(isrec._v_fmt)) # fixed length records + # dd.set_re_pad('-') # sets the pad character... + # dd.set_re_pad(45) # ...test both int and char + dd.open(filename, db.DB_RECNO) + if recsize == "big" or recsize == "medium": + print(isrec._v_fmt) + c = dd.cursor() + rec = c.first() + e = [] + while rec: + record = cPickle.loads(rec[1]) + #record = struct.unpack(isrec._v_fmt, rec[1]) + # if verbose: + # print record + if record['grid_i'] < 20: + e.append(record['grid_j']) + # if record[4] < 20: + # e.append(record[5]) + rec = next(c) + else: + print(isrec._v_fmt) + #e = [ t[1] for t in fileh[table] if t[1] < 20 ] + c = dd.cursor() + rec = c.first() + e = [] + while rec: + record = cPickle.loads(rec[1]) + #record = struct.unpack(isrec._v_fmt, rec[1]) + # if verbose: + # print record + if record['var2'] < 20: + e.append(record['var1']) + # if record[1] < 20: + # e.append(record[2]) + rec = next(c) + + print("resulting selection list ==>", e) + print("last record read ==>", record) + print("Total selected records ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + dd.close() + + +# Add code to test here +if __name__ == "__main__": + import getopt + import time + + usage = """usage: %s [-v] [-s recsize] [-i iterations] file + -v verbose + -s use [big] record, [medium] or [small] + -i sets the number of rows in each table\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 's:vi:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + recsize = "medium" + iterations = 100 + verbose = 0 + + # Get the options + for option in opts: + if option[0] == '-s': + recsize = option[1] + if recsize not in ["big", "medium", "small"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == '-i': + iterations = int(option[1]) + elif option[0] == '-v': + verbose = 1 + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + t1 = time.clock() + psyco.bind(createFile) + (rowsw, rowsz) = createFile(file, iterations, recsize, verbose) + t2 = time.clock() + tapprows = round(t2 - t1, 3) + + t1 = time.clock() + psyco.bind(readFile) + readFile(file, recsize, verbose) + t2 = time.clock() + treadrows = round(t2 - t1, 3) + + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time appending rows:", tapprows) + if tapprows > 0.: + print("Write rows/sec: ", int(iterations / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + print("Time reading rows:", treadrows) + if treadrows > 0.: + print("Read rows/sec: ", int(iterations / float(treadrows))) + print("Read KB/s :", int(rowsw * rowsz / (treadrows * 1024))) diff --git a/bench/cacheout.py b/bench/cacheout.py new file mode 100644 index 0000000..e68cbe5 --- /dev/null +++ b/bench/cacheout.py @@ -0,0 +1,13 @@ +# Program to clean out the filesystem cache +import numpy + +a = numpy.arange(1000 * 100 * 125, dtype='f8') # 100 MB of RAM +b = a * 3 # Another 100 MB +# delete the reference to the booked memory +del a +del b + +# Do a loop to fully recharge the python interpreter +j = 2 +for i in range(1000 * 1000): + j += i * 2 diff --git a/bench/chunkshape-bench.py b/bench/chunkshape-bench.py new file mode 100644 index 0000000..bc4df36 --- /dev/null +++ b/bench/chunkshape-bench.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# Benchmark the effect of chunkshapes in reading large datasets. +# You need at least PyTables 2.1 to run this! +# F. Alted + +from __future__ import print_function +import numpy +import tables +from time import time + +dim1, dim2 = 360, 6109666 +rows_to_read = range(0, 360, 36) + +print("=" * 32) +# Create the EArray +f = tables.open_file("/tmp/test.h5", "w") +a = f.create_earray(f.root, "a", tables.Float64Atom(), shape=(dim1, 0), + expectedrows=dim2) +print("Chunkshape for original array:", a.chunkshape) + +# Fill the EArray +t1 = time() +zeros = numpy.zeros((dim1, 1), dtype="float64") +for i in range(dim2): + a.append(zeros) +tcre = round(time() - t1, 3) +thcre = round(dim1 * dim2 * 8 / (tcre * 1024 * 1024), 1) +print("Time to append %d rows: %s sec (%s MB/s)" % (a.nrows, tcre, thcre)) + +# Read some row vectors from the original array +t1 = time() +for i in rows_to_read: + r1 = a[i, :] +tr1 = round(time() - t1, 3) +thr1 = round(dim2 * len(rows_to_read) * 8 / (tr1 * 1024 * 1024), 1) +print("Time to read ten rows in original array: %s sec (%s MB/s)" % (tr1, + thr1)) + +print("=" * 32) +# Copy the array to another with a row-wise chunkshape +t1 = time() +#newchunkshape = (1, a.chunkshape[0]*a.chunkshape[1]) +newchunkshape = (1, a.chunkshape[0] * a.chunkshape[1] * 10) # ten times larger +b = a.copy(f.root, "b", chunkshape=newchunkshape) +tcpy = round(time() - t1, 3) +thcpy = round(dim1 * dim2 * 8 / (tcpy * 1024 * 1024), 1) +print("Chunkshape for row-wise chunkshape array:", b.chunkshape) +print("Time to copy the original array: %s sec (%s MB/s)" % (tcpy, thcpy)) + +# Read the same ten rows from the new copied array +t1 = time() +for i in rows_to_read: + r2 = b[i, :] +tr2 = round(time() - t1, 3) +thr2 = round(dim2 * len(rows_to_read) * 8 / (tr2 * 1024 * 1024), 1) +print("Time to read with a row-wise chunkshape: %s sec (%s MB/s)" % (tr2, + thr2)) +print("=" * 32) +print("Speed-up with a row-wise chunkshape:", round(tr1 / tr2, 1)) + +f.close() diff --git a/bench/chunkshape-testing.py b/bench/chunkshape-testing.py new file mode 100644 index 0000000..1b1082a --- /dev/null +++ b/bench/chunkshape-testing.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +"""Simple benchmark for testing chunkshapes and nrowsinbuf.""" + +from __future__ import print_function +import numpy +import tables +from time import time + +L = 20 +N = 2000 +M = 30 +complevel = 1 + +recarray = numpy.empty(shape=2, dtype='(2,2,2)i4,(2,3,3)f8,i4,i8') + +f = tables.open_file("chunkshape.h5", mode="w") + +# t = f.create_table(f.root, 'table', recarray, "mdim recarray") + +# a0 = f.create_array(f.root, 'field0', recarray['f0'], "mdim int32 array") +# a1 = f.create_array(f.root, 'field1', recarray['f1'], "mdim float64 array") + +# c0 = f.create_carray(f.root, 'cfield0', +# tables.Int32Atom(), (2,2,2), +# "mdim int32 carray") +# c1 = f.create_carray(f.root, 'cfield1', +# tables.Float64Atom(), (2,3,3), +# "mdim float64 carray") + +f1 = tables.open_file("chunkshape1.h5", mode="w") +c1 = f.create_carray(f1.root, 'cfield1', + tables.Int32Atom(), (L, N, M), + "scalar int32 carray", tables.Filters(complevel=0)) + +t1 = time() +c1[:] = numpy.empty(shape=(L, 1, 1), dtype="int32") +print("carray1 populate time:", time() - t1) +f1.close() + + +f2 = tables.open_file("chunkshape2.h5", mode="w") +c2 = f.create_carray(f2.root, 'cfield2', + tables.Int32Atom(), (L, M, N), + "scalar int32 carray", tables.Filters(complevel)) + +t1 = time() +c2[:] = numpy.empty(shape=(L, 1, 1), dtype="int32") +print("carray2 populate time:", time() - t1) +f2.close() + +f0 = tables.open_file("chunkshape0.h5", mode="w") +e0 = f.create_earray(f0.root, 'efield0', + tables.Int32Atom(), (0, L, M), + "scalar int32 carray", tables.Filters(complevel), + expectedrows=N) + +t1 = time() +e0.append(numpy.empty(shape=(N, L, M), dtype="int32")) +print("earray0 populate time:", time() - t1) +f0.close() + +f1 = tables.open_file("chunkshape1.h5", mode="w") +e1 = f.create_earray(f1.root, 'efield1', + tables.Int32Atom(), (L, 0, M), + "scalar int32 carray", tables.Filters(complevel), + expectedrows=N) + +t1 = time() +e1.append(numpy.empty(shape=(L, N, M), dtype="int32")) +print("earray1 populate time:", time() - t1) +f1.close() + + +f2 = tables.open_file("chunkshape2.h5", mode="w") +e2 = f.create_earray(f2.root, 'efield2', + tables.Int32Atom(), (L, M, 0), + "scalar int32 carray", tables.Filters(complevel), + expectedrows=N) + +t1 = time() +e2.append(numpy.empty(shape=(L, M, N), dtype="int32")) +print("earray2 populate time:", time() - t1) +f2.close() + +# t1=time() +# c2[:] = numpy.empty(shape=(M, N), dtype="int32") +# print "carray populate time:", time()-t1 + +# f3 = f.create_carray(f.root, 'cfield3', +# tables.Float64Atom(), (3,), +# "scalar float64 carray", chunkshape=(32,)) + +# e2 = f.create_earray(f.root, 'efield2', +# tables.Int32Atom(), (0, M), +# "scalar int32 carray", expectedrows=N) +# t1=time() +# e2.append(numpy.empty(shape=(N, M), dtype="int32")) +# print "earray populate time:", time()-t1 + +# t1=time() +# c2._f_copy(newname='cfield2bis') +# print "carray copy time:", time()-t1 +# t1=time() +# e2._f_copy(newname='efield2bis') +# print "earray copy time:", time()-t1 + +f.close() diff --git a/bench/collations.py b/bench/collations.py new file mode 100644 index 0000000..e9f69b7 --- /dev/null +++ b/bench/collations.py @@ -0,0 +1,124 @@ +from __future__ import print_function +import numpy as np +import tables +from time import time + +N = 1000 * 1000 +NCOLL = 200 # 200 collections maximum + +# In order to have reproducible results +np.random.seed(19) + + +class Energies(tables.IsDescription): + collection = tables.UInt8Col() + energy = tables.Float64Col() + + +def fill_bucket(lbucket): + #c = np.random.normal(NCOLL/2, NCOLL/10, lbucket) + c = np.random.normal(NCOLL / 2, NCOLL / 100, lbucket) + e = np.arange(lbucket, dtype='f8') + return c, e + +# Fill the table +t1 = time() +f = tables.open_file("data.nobackup/collations.h5", "w") +table = f.create_table("/", "Energies", Energies, expectedrows=N) +# Fill the table with values +lbucket = 1000 # Fill in buckets of 1000 rows, for speed +for i in range(0, N, lbucket): + bucket = fill_bucket(lbucket) + table.append(bucket) +# Fill the remaining rows +bucket = fill_bucket(N % lbucket) +table.append(bucket) +f.close() +print("Time to create the table with %d entries: %.3f" % (N, time() - t1)) + +# Now, read the table and group it by collection +f = tables.open_file("data.nobackup/collations.h5", "a") +table = f.root.Energies + +######################################################### +# First solution: load the table completely in memory +######################################################### +t1 = time() +t = table[:] # convert to structured array +coll1 = [] +collections = np.unique(t['collection']) +for c in collections: + cond = t['collection'] == c + energy_this_collection = t['energy'][cond] + sener = energy_this_collection.sum() + coll1.append(sener) + print(c, ' : ', sener) +del collections, energy_this_collection +print("Time for first solution: %.3f" % (time() - t1)) + +######################################################### +# Second solution: load all the collections in memory +######################################################### +t1 = time() +collections = {} +for row in table: + c = row['collection'] + e = row['energy'] + if c in collections: + collections[c].append(e) + else: + collections[c] = [e] +# Convert the lists in numpy arrays +coll2 = [] +for c in sorted(collections): + energy_this_collection = np.array(collections[c]) + sener = energy_this_collection.sum() + coll2.append(sener) + print(c, ' : ', sener) +del collections, energy_this_collection +print("Time for second solution: %.3f" % (time() - t1)) + +t1 = time() +table.cols.collection.create_csindex() +# table.cols.collection.reindex() +print("Time for indexing: %.3f" % (time() - t1)) + +######################################################### +# Third solution: load each collection separately +######################################################### +t1 = time() +coll3 = [] +for c in np.unique(table.col('collection')): + energy_this_collection = table.read_where( + 'collection == c', field='energy') + sener = energy_this_collection.sum() + coll3.append(sener) + print(c, ' : ', sener) +del energy_this_collection +print("Time for third solution: %.3f" % (time() - t1)) + + +t1 = time() +table2 = table.copy('/', 'EnergySortedByCollation', overwrite=True, + sortby="collection", propindexes=True) +print("Time for sorting: %.3f" % (time() - t1)) + +##################################################################### +# Fourth solution: load each collection separately. Sorted table. +##################################################################### +t1 = time() +coll4 = [] +for c in np.unique(table2.col('collection')): + energy_this_collection = table2.read_where( + 'collection == c', field='energy') + sener = energy_this_collection.sum() + coll4.append(sener) + print(c, ' : ', sener) + del energy_this_collection +print("Time for fourth solution: %.3f" % (time() - t1)) + + +# Finally, check that all solutions do match +assert coll1 == coll2 == coll3 == coll4 + +f.close() diff --git a/bench/copy-bench.py b/bench/copy-bench.py new file mode 100644 index 0000000..6346a82 --- /dev/null +++ b/bench/copy-bench.py @@ -0,0 +1,33 @@ +from __future__ import print_function +import tables +import sys +import time + +if len(sys.argv) != 3: + print("usage: %s source_file dest_file", sys.argv[0]) +filesrc = sys.argv[1] +filedest = sys.argv[2] +filehsrc = tables.open_file(filesrc) +filehdest = tables.open_file(filedest, 'w') +ntables = 0 +tsize = 0 +t1 = time.time() +for group in filehsrc.walk_groups(): + if isinstance(group._v_parent, tables.File): + groupdest = filehdest.root + else: + pathname = group._v_parent._v_pathname + groupdest = filehdest.create_group(pathname, group._v_name, + title=group._v_title) + for table in filehsrc.list_nodes(group, classname='Table'): + print("copying table -->", table) + table.copy(groupdest, table.name) + ntables += 1 + tsize += table.nrows * table.rowsize +tsizeMB = tsize / (1024 * 1024) +ttime = round(time.time() - t1, 3) +speed = round(tsizeMB / ttime, 2) +print("Copied %s tables for a total of %s MB in %s seconds (%s MB/s)" % + (ntables, tsizeMB, ttime, speed)) +filehsrc.close() +filehdest.close() diff --git a/bench/create-large-number-objects.py b/bench/create-large-number-objects.py new file mode 100644 index 0000000..f70406b --- /dev/null +++ b/bench/create-large-number-objects.py @@ -0,0 +1,42 @@ +"This creates an HDF5 file with a potentially large number of objects" + +import sys +import numpy +import tables + +filename = sys.argv[1] + +# Open a new empty HDF5 file +fileh = tables.open_file(filename, mode="w") + +# nlevels -- Number of levels in hierarchy +# ngroups -- Number of groups on each level +# ndatasets -- Number of arrays on each group +# LR: Low ratio groups/datasets +#nlevels, ngroups, ndatasets = (3, 1, 1000) +# MR: Medium ratio groups/datasets +nlevels, ngroups, ndatasets = (3, 10, 100) +#nlevels, ngroups, ndatasets = (3, 5, 10) +# HR: High ratio groups/datasets +#nlevels, ngroups, ndatasets = (30, 10, 10) + +# Create an Array to save on disk +a = numpy.array([-1, 2, 4], numpy.int16) + +group = fileh.root +group2 = fileh.root +for k in range(nlevels): + for j in range(ngroups): + for i in range(ndatasets): + # Save the array on the HDF5 file + fileh.create_array(group2, 'array' + str(i), + a, "Signed short array") + # Create a new group + group2 = fileh.create_group(group, 'group' + str(j)) + # Create a new group + group3 = fileh.create_group(group, 'ngroup' + str(k)) + # Iterate over this new group (group3) + group = group3 + group2 = group3 + +fileh.close() diff --git a/bench/deep-tree-h5py.py b/bench/deep-tree-h5py.py new file mode 100644 index 0000000..2f356e8 --- /dev/null +++ b/bench/deep-tree-h5py.py @@ -0,0 +1,119 @@ +from __future__ import print_function +import os +import subprocess +from time import time +import random +import numpy +import h5py + +random.seed(2) + + +def show_stats(explain, tref): + "Show the used memory (only works for Linux 2.6.x)." + # Build the command to obtain memory info + cmd = "cat /proc/%s/status" % os.getpid() + sout = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout + for line in sout: + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + sout.close() + print("Memory usage: ******* %s *******" % explain) + print("VmSize: %7s kB\tVmRSS: %7s kB" % (vmsize, vmrss)) + print("VmData: %7s kB\tVmStk: %7s kB" % (vmdata, vmstk)) + print("VmExe: %7s kB\tVmLib: %7s kB" % (vmexe, vmlib)) + tnow = time() + print("WallClock time:", round(tnow - tref, 3)) + return tnow + + +def populate(f, nlevels): + g = f + arr = numpy.zeros((10,), "f4") + for i in range(nlevels): + g["DS1"] = arr + g["DS2"] = arr + g.create_group('group2_') + g = g.create_group('group') + + +def getnode(f, nlevels, niter, range_): + for i in range(niter): + nlevel = random.randrange( + (nlevels - range_) / 2, (nlevels + range_) / 2) + groupname = "" + for i in range(nlevel): + groupname += "/group" + groupname += "/DS1" + f[groupname] + + +if __name__ == '__main__': + nlevels = 1024 + niter = 1000 + range_ = 256 + profile = True + doprofile = True + verbose = False + + if doprofile: + import pstats + import cProfile as prof + + if profile: + tref = time() + if profile: + show_stats("Abans de crear...", tref) + f = h5py.File("/tmp/deep-tree.h5", 'w') + if doprofile: + prof.run('populate(f, nlevels)', 'populate.prof') + stats = pstats.Stats('populate.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + populate(f, nlevels) + f.close() + if profile: + show_stats("Despres de crear", tref) + +# if profile: tref = time() +# if profile: show_stats("Abans d'obrir...", tref) +# f = h5py.File("/tmp/deep-tree.h5", 'r') +# if profile: show_stats("Abans d'accedir...", tref) +# if doprofile: +# prof.run('getnode(f, nlevels, niter, range_)', 'deep-tree.prof') +# stats = pstats.Stats('deep-tree.prof') +# stats.strip_dirs() +# stats.sort_stats('time', 'calls') +# if verbose: +# stats.print_stats() +# else: +# stats.print_stats(20) +# else: +# getnode(f, nlevels, niter, range_) +# if profile: show_stats("Despres d'accedir", tref) +# f.close() +# if profile: show_stats("Despres de tancar", tref) + +# f = h5py.File("/tmp/deep-tree.h5", 'r') +# g = f +# for i in range(nlevels): +# dset = g["DS1"] +# dset = g["DS2"] +# group2 = g['group2_'] +# g = g['group'] +# f.close() diff --git a/bench/deep-tree.py b/bench/deep-tree.py new file mode 100644 index 0000000..1e897b1 --- /dev/null +++ b/bench/deep-tree.py @@ -0,0 +1,129 @@ +# Small benchmark for compare creation times with parameter +# PYTABLES_SYS_ATTRS active or not. + +from __future__ import print_function +import os +import subprocess +from time import time +import random +#import numpy +import tables + +random.seed(2) + + +def show_stats(explain, tref): + "Show the used memory (only works for Linux 2.6.x)." + # Build the command to obtain memory info + cmd = "cat /proc/%s/status" % os.getpid() + sout = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout + for line in sout: + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + sout.close() + print("Memory usage: ******* %s *******" % explain) + print("VmSize: %7s kB\tVmRSS: %7s kB" % (vmsize, vmrss)) + print("VmData: %7s kB\tVmStk: %7s kB" % (vmdata, vmstk)) + print("VmExe: %7s kB\tVmLib: %7s kB" % (vmexe, vmlib)) + tnow = time() + print("WallClock time:", round(tnow - tref, 3)) + return tnow + + +def populate(f, nlevels): + g = f.root + #arr = numpy.zeros((10,), "f4") + #descr = {'f0': tables.Int32Col(), 'f1': tables.Float32Col()} + for i in range(nlevels): + #dset = f.create_array(g, "DS1", arr) + #dset = f.create_array(g, "DS2", arr) + f.create_carray(g, "DS1", tables.IntAtom(), (10,)) + f.create_carray(g, "DS2", tables.IntAtom(), (10,)) + #dset = f.create_table(g, "DS1", descr) + #dset = f.create_table(g, "DS2", descr) + f.create_group(g, 'group2_') + g = f.create_group(g, 'group') + + +def getnode(f, nlevels, niter, range_): + for i in range(niter): + nlevel = random.randrange( + (nlevels - range_) / 2, (nlevels + range_) / 2) + groupname = "" + for i in range(nlevel): + groupname += "/group" + groupname += "/DS1" + f.get_node(groupname) + + +if __name__ == '__main__': + nlevels = 1024 + niter = 256 + range_ = 128 + nodeCacheSlots = 64 + pytables_sys_attrs = True + profile = True + doprofile = True + verbose = False + + if doprofile: + import pstats + import cProfile as prof + + if profile: + tref = time() + if profile: + show_stats("Abans de crear...", tref) + f = tables.open_file("/tmp/PTdeep-tree.h5", 'w', + node_cache_slots=nodeCacheSlots, + pytables_sys_attrs=pytables_sys_attrs) + if doprofile: + prof.run('populate(f, nlevels)', 'populate.prof') + stats = pstats.Stats('populate.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + populate(f, nlevels) + f.close() + if profile: + show_stats("Despres de crear", tref) + + if profile: + tref = time() + if profile: + show_stats("Abans d'obrir...", tref) + f = tables.open_file("/tmp/PTdeep-tree.h5", 'r', + node_cache_slots=nodeCacheSlots, + pytables_sys_attrs=pytables_sys_attrs) + if profile: + show_stats("Abans d'accedir...", tref) + if doprofile: + prof.run('getnode(f, nlevels, niter, range_)', 'getnode.prof') + stats = pstats.Stats('getnode.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + getnode(f, nlevels, niter, range_) + if profile: + show_stats("Despres d'accedir", tref) + f.close() + if profile: + show_stats("Despres de tancar", tref) diff --git a/bench/evaluate.py b/bench/evaluate.py new file mode 100644 index 0000000..9980c17 --- /dev/null +++ b/bench/evaluate.py @@ -0,0 +1,174 @@ +from __future__ import print_function +import sys +from time import time + +import numpy as np +import tables as tb +from numexpr.necompiler import ( + getContext, getExprNames, getType, NumExpr) + + +shape = (1000, 160000) +#shape = (10,1600) +filters = tb.Filters(complevel=1, complib="blosc", shuffle=0) +ofilters = tb.Filters(complevel=1, complib="blosc", shuffle=0) +#filters = tb.Filters(complevel=1, complib="lzo", shuffle=0) +#ofilters = tb.Filters(complevel=1, complib="lzo", shuffle=0) + +# TODO: Makes it sense to add a 's'tring typecode here? +typecode_to_dtype = {'b': 'bool', 'i': 'int32', 'l': 'int64', 'f': 'float32', + 'd': 'float64', 'c': 'complex128'} + + +def _compute(result, function, arguments, + start=None, stop=None, step=None): + """Compute the `function` over the `arguments` and put the outcome in + `result`""" + arg0 = arguments[0] + if hasattr(arg0, 'maindim'): + maindim = arg0.maindim + (start, stop, step) = arg0._process_range_read(start, stop, step) + nrowsinbuf = arg0.nrowsinbuf + print("nrowsinbuf-->", nrowsinbuf) + else: + maindim = 0 + (start, stop, step) = (0, len(arg0), 1) + nrowsinbuf = len(arg0) + shape = list(arg0.shape) + shape[maindim] = len(range(start, stop, step)) + + # The slices parameter for arg0.__getitem__ + slices = [slice(0, dim, 1) for dim in arg0.shape] + + # This is a hack to prevent doing unnecessary conversions + # when copying buffers + if hasattr(arg0, 'maindim'): + for arg in arguments: + arg._v_convert = False + + # Start the computation itself + for start2 in range(start, stop, step * nrowsinbuf): + # Save the records on disk + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Set the proper slice in the main dimension + slices[maindim] = slice(start2, stop2, step) + start3 = (start2 - start) / step + stop3 = start3 + nrowsinbuf + if stop3 > shape[maindim]: + stop3 = shape[maindim] + # Compute the slice to be filled in destination + sl = [] + for i in range(maindim): + sl.append(slice(None, None, None)) + sl.append(slice(start3, stop3, None)) + # Get the values for computing the buffer + values = [arg.__getitem__(tuple(slices)) for arg in arguments] + result[tuple(sl)] = function(*values) + + # Activate the conversion again (default) + if hasattr(arg0, 'maindim'): + for arg in arguments: + arg._v_convert = True + + return result + + +def evaluate(ex, out=None, local_dict=None, global_dict=None, **kwargs): + """Evaluate expression and return an array.""" + + # First, get the signature for the arrays in expression + context = getContext(kwargs) + names, _ = getExprNames(ex, context) + + # Get the arguments based on the names. + call_frame = sys._getframe(1) + if local_dict is None: + local_dict = call_frame.f_locals + if global_dict is None: + global_dict = call_frame.f_globals + arguments = [] + types = [] + for name in names: + try: + a = local_dict[name] + except KeyError: + a = global_dict[name] + arguments.append(a) + if hasattr(a, 'atom'): + types.append(a.atom) + else: + types.append(a) + + # Create a signature + signature = [(name, getType(type_)) for (name, type_) in zip(names, types)] + print("signature-->", signature) + + # Compile the expression + compiled_ex = NumExpr(ex, signature, [], **kwargs) + print("fullsig-->", compiled_ex.fullsig) + + _compute(out, compiled_ex, arguments) + + return + + +if __name__ == "__main__": + iarrays = 0 + oarrays = 0 + doprofile = 1 + dokprofile = 0 + + f = tb.open_file("/scratch2/faltet/evaluate.h5", "w") + + # Create some arrays + if iarrays: + a = np.ones(shape, dtype='float32') + b = np.ones(shape, dtype='float32') * 2 + c = np.ones(shape, dtype='float32') * 3 + else: + a = f.create_carray(f.root, 'a', tb.Float32Atom(dflt=1.), + shape=shape, filters=filters) + a[:] = 1. + b = f.create_carray(f.root, 'b', tb.Float32Atom(dflt=2.), + shape=shape, filters=filters) + b[:] = 2. + c = f.create_carray(f.root, 'c', tb.Float32Atom(dflt=3.), + shape=shape, filters=filters) + c[:] = 3. + if oarrays: + out = np.empty(shape, dtype='float32') + else: + out = f.create_carray(f.root, 'out', tb.Float32Atom(), + shape=shape, filters=ofilters) + + t0 = time() + if iarrays and oarrays: + #out = ne.evaluate("a*b+c") + out = a * b + c + elif doprofile: + import cProfile as prof + import pstats + prof.run('evaluate("a*b+c", out)', 'evaluate.prof') + stats = pstats.Stats('evaluate.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(20) + elif dokprofile: + from cProfile import Profile + import lsprofcalltree + prof = Profile() + prof.run('evaluate("a*b+c", out)') + kcg = lsprofcalltree.KCacheGrind(prof) + ofile = open('evaluate.kcg', 'w') + kcg.output(ofile) + ofile.close() + else: + evaluate("a*b+c", out) + print("Time for evaluate-->", round(time() - t0, 3)) + + # print "out-->", `out` + # print `out[:]` + + f.close() diff --git a/bench/expression.py b/bench/expression.py new file mode 100644 index 0000000..55beade --- /dev/null +++ b/bench/expression.py @@ -0,0 +1,179 @@ +from __future__ import print_function +from time import time +import os.path + +import numpy as np +import tables as tb + +OUT_DIR = "/scratch2/faltet/" # the directory for data output + +shape = (1000, 1000 * 1000) # shape for input arrays +expr = "a*b+1" # Expression to be computed + +nrows, ncols = shape + + +def tables(docompute, dowrite, complib, verbose): + + # Filenames + ifilename = os.path.join(OUT_DIR, "expression-inputs.h5") + ofilename = os.path.join(OUT_DIR, "expression-outputs.h5") + + # Filters + shuffle = True + if complib == 'blosc': + filters = tb.Filters(complevel=1, complib='blosc', shuffle=shuffle) + elif complib == 'lzo': + filters = tb.Filters(complevel=1, complib='lzo', shuffle=shuffle) + elif complib == 'zlib': + filters = tb.Filters(complevel=1, complib='zlib', shuffle=shuffle) + else: + filters = tb.Filters(complevel=0, shuffle=False) + if verbose: + print("Will use filters:", filters) + + if dowrite: + f = tb.open_file(ifilename, 'w') + + # Build input arrays + t0 = time() + root = f.root + a = f.create_carray(root, 'a', tb.Float32Atom(), + shape, filters=filters) + b = f.create_carray(root, 'b', tb.Float32Atom(), + shape, filters=filters) + if verbose: + print("chunkshape:", a.chunkshape) + print("chunksize:", np.prod(a.chunkshape) * a.dtype.itemsize) + #row = np.linspace(0, 1, ncols) + row = np.arange(0, ncols, dtype='float32') + for i in range(nrows): + a[i] = row * (i + 1) + b[i] = row * (i + 1) * 2 + f.close() + print("[tables.Expr] Time for creating inputs:", round(time() - t0, 3)) + + if docompute: + f = tb.open_file(ifilename, 'r') + fr = tb.open_file(ofilename, 'w') + a = f.root.a + b = f.root.b + r1 = f.create_carray(fr.root, 'r1', tb.Float32Atom(), shape, + filters=filters) + # The expression + e = tb.Expr(expr) + e.set_output(r1) + t0 = time() + e.eval() + if verbose: + print("First ten values:", r1[0, :10]) + f.close() + fr.close() + print("[tables.Expr] Time for computing & save:", + round(time() - t0, 3)) + + +def memmap(docompute, dowrite, verbose): + + afilename = os.path.join(OUT_DIR, "memmap-a.bin") + bfilename = os.path.join(OUT_DIR, "memmap-b.bin") + rfilename = os.path.join(OUT_DIR, "memmap-output.bin") + if dowrite: + t0 = time() + a = np.memmap(afilename, dtype='float32', mode='w+', shape=shape) + b = np.memmap(bfilename, dtype='float32', mode='w+', shape=shape) + + # Fill arrays a and b + #row = np.linspace(0, 1, ncols) + row = np.arange(0, ncols, dtype='float32') + for i in range(nrows): + a[i] = row * (i + 1) + b[i] = row * (i + 1) * 2 + del a, b # flush data + print("[numpy.memmap] Time for creating inputs:", + round(time() - t0, 3)) + + if docompute: + t0 = time() + # Reopen inputs in read-only mode + a = np.memmap(afilename, dtype='float32', mode='r', shape=shape) + b = np.memmap(bfilename, dtype='float32', mode='r', shape=shape) + # Create the array output + r = np.memmap(rfilename, dtype='float32', mode='w+', shape=shape) + # Do the computation row by row + for i in range(nrows): + r[i] = eval(expr, {'a': a[i], 'b': b[i]}) + if verbose: + print("First ten values:", r[0, :10]) + del a, b + del r # flush output data + print("[numpy.memmap] Time for compute & save:", round(time() - t0, 3)) + + +def do_bench(what, documpute, dowrite, complib, verbose): + if what == "tables": + tables(docompute, dowrite, complib, verbose) + if what == "memmap": + memmap(docompute, dowrite, verbose) + + +if __name__ == "__main__": + import sys + import os + import getopt + + usage = """usage: %s [-T] [-M] [-c] [-w] [-v] [-z complib] + -T use tables.Expr + -M use numpy.memmap + -c do the computation only + -w write inputs only + -v verbose mode + -z select compression library ('zlib' or 'lzo'). Default is None. +""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'TMcwvz:') + except: + sys.stderr.write(usage) + sys.exit(1) + + # default options + usepytables = False + usememmap = False + docompute = False + dowrite = False + verbose = False + complib = None + + # Get the options + for option in opts: + if option[0] == '-T': + usepytables = True + elif option[0] == '-M': + usememmap = True + elif option[0] == '-c': + docompute = True + elif option[0] == '-w': + dowrite = True + elif option[0] == '-v': + verbose = True + elif option[0] == '-z': + complib = option[1] + if complib not in ('blosc', 'lzo', 'zlib'): + print(("complib must be 'lzo' or 'zlib' " + "and you passed: '%s'" % complib)) + sys.exit(1) + + # If not a backend selected, abort + if not usepytables and not usememmap: + print("Please select a backend:") + print("PyTables.Expr: -T") + print("NumPy.memmap: -M") + sys.exit(1) + + # Select backend and do the benchmark + if usepytables: + what = "tables" + if usememmap: + what = "memmap" + do_bench(what, docompute, dowrite, complib, verbose) diff --git a/bench/get-figures-ranges.py b/bench/get-figures-ranges.py new file mode 100644 index 0000000..98685a6 --- /dev/null +++ b/bench/get-figures-ranges.py @@ -0,0 +1,232 @@ +from __future__ import print_function +from pylab import * + +linewidth = 2 +#markers= ['+', ',', 'o', '.', 's', 'v', 'x', '>', '<', '^'] +#markers= [ 'x', '+', 'o', 's', 'v', '^', '>', '<', ] +markers = ['s', 'o', 'v', '^', '+', 'x', '>', '<', ] +markersize = 8 + + +def get_values(filename): + f = open(filename) + sizes = [] + values = [] + isize = None + for line in f: + if line.startswith('range'): + tmp = line.split(':')[1] + tmp = tmp.strip() + tmp = tmp[1:-1] + lower, upper = int(tmp.split(',')[0]), int(tmp.split(',')[1]) + isize = upper - lower + # print "isize-->", isize + if isize is None or isize == 0: + continue + if insert and line.startswith('Insert time'): + tmp = line.split(':')[1] + #itime = float(tmp[:tmp.index(',')]) + itime = float(tmp) + sizes.append(isize) + values.append(itime) + elif line.startswith('Index time'): + tmp = line.split(':')[1] + #xtime = float(tmp[:tmp.index(',')]) + xtime = float(tmp) + txtime += xtime + if create_index and create_index in line: + sizes.append(isize) + values.append(xtime) + elif create_total and txtime > xtime: + sizes.append(isize) + values.append(txtime) + elif table_size and line.startswith('Table size'): + tsize = float(line.split(':')[1]) + sizes.append(isize) + values.append(tsize) + elif indexes_size and line.startswith('Indexes size'): + xsize = float(line.split(':')[1]) + sizes.append(isize) + values.append(xsize) + elif total_size and line.startswith('Full size'): + fsize = float(line.split(':')[1]) + sizes.append(isize) + values.append(fsize) + elif ((query or query_cold or query_warm) and + line.startswith('[NOREP]')): + tmp = line.split(':')[1] + try: + qtime = float(tmp[:tmp.index('+-')]) + except ValueError: + qtime = float(tmp) + if colname in line: + if query and '1st' in line: + sizes.append(isize) + values.append(qtime) + elif query_cold and 'cold' in line: + sizes.append(isize) + values.append(qtime) + elif query_warm and 'warm' in line: + sizes.append(isize) + values.append(qtime) + + f.close() + return sizes, values + + +def show_plot(plots, yaxis, legends, gtitle): + xlabel('Number of hits') + ylabel(yaxis) + title(gtitle) + #ylim(0, 100) + grid(True) + +# legends = [f[f.find('-'):f.index('.out')] for f in filenames] +# legends = [l.replace('-', ' ') for l in legends] + #legend([p[0] for p in plots], legends, loc = "upper left") + legend([p[0] for p in plots], legends, loc="best") + + #subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + savefig(outfile) + else: + show() + +if __name__ == '__main__': + + import sys + import getopt + + usage = """usage: %s [-o file] [-t title] [--insert] [--create-index] [--create-total] [--table-size] [--indexes-size] [--total-size] [--query=colname] [--query-cold=colname] [--query-warm=colname] files + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + --insert -- Insert time for table + --create-index=colname -- Index time for column + --create-total -- Total time for creation of table + indexes + --table-size -- Size of table + --indexes-size -- Size of all indexes + --total-size -- Total size of table + indexes + --query=colname -- Time for querying the specified column + --query-cold=colname -- Time for querying the specified column (cold cache) + --query-warm=colname -- Time for querying the specified column (warm cache) + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'o:t:', + ['insert', + 'create-index=', + 'create-total', + 'table-size', + 'indexes-size', + 'total-size', + 'query=', + 'query-cold=', + 'query-warm=', + ]) + except: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + outfile = None + insert = 0 + create_index = None + create_total = 0 + table_size = 0 + indexes_size = 0 + total_size = 0 + query = 0 + query_cold = 0 + query_warm = 0 + colname = None + yaxis = "No axis name" + tit = None + gtitle = "Please set a title!" + + # Get the options + for option in opts: + if option[0] == '-o': + outfile = option[1] + elif option[0] == '-t': + tit = option[1] + elif option[0] == '--insert': + insert = 1 + yaxis = "Time (s)" + gtitle = "Insert time for table" + elif option[0] == '--create-index': + create_index = option[1] + yaxis = "Time (s)" + gtitle = "Create index time for column " + create_index + elif option[0] == '--create-total': + create_total = 1 + yaxis = "Time (s)" + gtitle = "Create time for table + indexes" + elif option[0] == '--table-size': + table_size = 1 + yaxis = "Size (MB)" + gtitle = "Table size" + elif option[0] == '--indexes-size': + indexes_size = 1 + yaxis = "Size (MB)" + gtitle = "Indexes size" + elif option[0] == '--total-size': + total_size = 1 + yaxis = "Size (MB)" + gtitle = "Total size (table + indexes)" + elif option[0] == '--query': + query = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (first query)" + elif option[0] == '--query-cold': + query_cold = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (cold cache)" + elif option[0] == '--query-warm': + query_warm = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (warm cache)" + + filenames = pargs + + if tit: + gtitle = tit + + plots = [] + legends = [] + for filename in filenames: + plegend = filename[filename.find('-'):filename.index('.out')] + plegend = plegend.replace('-', ' ') + xval, yval = get_values(filename) + print("Values for %s --> %s, %s" % (filename, xval, yval)) + if "PyTables" in filename or "pytables" in filename: + plot = loglog(xval, yval, linewidth=2) + #plot = semilogx(xval, yval, linewidth=2) + plots.append(plot) + setp(plot, marker=markers[0], markersize=markersize, + linewidth=linewidth) + else: + plots.append(loglog(xval, yval, linewidth=3, color='m')) + #plots.append(semilogx(xval, yval, linewidth=3, color='m')) + #plots.append(semilogx(xval, yval, linewidth=5)) + legends.append(plegend) + if 0: # Per a introduir dades simulades si es vol... + xval = [1000, 10000, 100000, 1000000, 10000000, + 100000000, 1000000000] +# yval = [0.003, 0.005, 0.02, 0.06, 1.2, +# 40, 210] + yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, + 0.2, 5.6] + plots.append(loglog(xval, yval, linewidth=5)) + legends.append("PyTables Std") + show_plot(plots, yaxis, legends, gtitle) diff --git a/bench/get-figures.py b/bench/get-figures.py new file mode 100644 index 0000000..be0e24b --- /dev/null +++ b/bench/get-figures.py @@ -0,0 +1,292 @@ +from __future__ import print_function +from pylab import * + +linewidth = 2 +#markers= ['+', ',', 'o', '.', 's', 'v', 'x', '>', '<', '^'] +#markers= [ 'x', '+', 'o', 's', 'v', '^', '>', '<', ] +markers = ['s', 'o', 'v', '^', '+', 'x', '>', '<', ] +markersize = 8 + + +def get_values(filename): + f = open(filename) + sizes = [] + values = [] + for line in f: + if line.startswith('Processing database:'): + txtime = 0 + line = line.split(':')[1] + # Check if entry is compressed and if has to be processed + line = line[:line.rfind('.')] + params = line.split('-') + for param in params: + if param[-1] in ('k', 'm', 'g'): + size = param + isize = int(size[:-1]) * 1000 + if size[-1] == "m": + isize *= 1000 + elif size[-1] == "g": + isize *= 1000 * 1000 + elif insert and line.startswith('Insert time'): + tmp = line.split(':')[1] + itime = float(tmp) + sizes.append(isize) + values.append(itime) + elif (overlaps or entropy) and line.startswith('overlaps'): + tmp = line.split(':')[1] + e1, e2 = tmp.split() + if isize in sizes: + sizes.pop() + values.pop() + sizes.append(isize) + if overlaps: + values.append(int(e1) + 1) + else: + values.append(float(e2) + 1) + elif (create_total or create_index) and line.startswith('Index time'): + tmp = line.split(':')[1] + xtime = float(tmp) + txtime += xtime + if create_index and create_index in line: + sizes.append(isize) + values.append(xtime) + elif create_total and txtime > xtime: + sizes.append(isize) + values.append(txtime) + elif table_size and line.startswith('Table size'): + tsize = float(line.split(':')[1]) + sizes.append(isize) + values.append(tsize) + elif indexes_size and line.startswith('Indexes size'): + xsize = float(line.split(':')[1]) + sizes.append(isize) + values.append(xsize) + elif total_size and line.startswith('Full size'): + fsize = float(line.split(':')[1]) + sizes.append(isize) + values.append(fsize) + elif query and line.startswith('Query time'): + tmp = line.split(':')[1] + qtime = float(tmp) + if colname in line: + sizes.append(isize) + values.append(qtime) + elif ((query or query_cold or query_warm) and + line.startswith('[NOREP]')): + tmp = line.split(':')[1] + try: + qtime = float(tmp[:tmp.index('+-')]) + except ValueError: + qtime = float(tmp) + if colname in line: + if query and '1st' in line: + sizes.append(isize) + values.append(qtime) + elif query_cold and 'cold' in line: + sizes.append(isize) + values.append(qtime) + elif query_warm and 'warm' in line: + sizes.append(isize) + values.append(qtime) + elif query_repeated and line.startswith('[REP]'): + if colname in line and 'warm' in line: + tmp = line.split(':')[1] + qtime = float(tmp[:tmp.index('+-')]) + sizes.append(isize) + values.append(qtime) + + f.close() + return sizes, values + + +def show_plot(plots, yaxis, legends, gtitle): + xlabel('Number of rows') + ylabel(yaxis) + title(gtitle) + #xlim(10**3, 10**9) + xlim(10 ** 3, 10 ** 10) + # ylim(1.0e-5) + #ylim(-1e4, 1e5) + #ylim(-1e3, 1e4) + #ylim(-1e2, 1e3) + grid(True) + +# legends = [f[f.find('-'):f.index('.out')] for f in filenames] +# legends = [l.replace('-', ' ') for l in legends] + legend([p[0] for p in plots], legends, loc="upper left") + #legend([p[0] for p in plots], legends, loc = "center left") + + #subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + savefig(outfile) + else: + show() + +if __name__ == '__main__': + + import sys + import getopt + + usage = """usage: %s [-o file] [-t title] [--insert] [--create-index] [--create-total] [--overlaps] [--entropy] [--table-size] [--indexes-size] [--total-size] [--query=colname] [--query-cold=colname] [--query-warm=colname] [--query-repeated=colname] files + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + --insert -- Insert time for table + --create-index=colname -- Index time for column + --create-total -- Total time for creation of table + indexes + --overlaps -- The overlapping for the created index + --entropy -- The entropy for the created index + --table-size -- Size of table + --indexes-size -- Size of all indexes + --total-size -- Total size of table + indexes + --query=colname -- Time for querying the specified column + --query-cold=colname -- Time for querying the specified column (cold cache) + --query-warm=colname -- Time for querying the specified column (warm cache) + --query-repeated=colname -- Time for querying the specified column (rep query) + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'o:t:', + ['insert', + 'create-index=', + 'create-total', + 'overlaps', + 'entropy', + 'table-size', + 'indexes-size', + 'total-size', + 'query=', + 'query-cold=', + 'query-warm=', + 'query-repeated=', + ]) + except: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + outfile = None + insert = 0 + create_index = None + create_total = 0 + overlaps = 0 + entropy = 0 + table_size = 0 + indexes_size = 0 + total_size = 0 + query = 0 + query_cold = 0 + query_warm = 0 + query_repeated = 0 + colname = None + yaxis = "No axis name" + tit = None + gtitle = "Please set a title!" + + # Get the options + for option in opts: + if option[0] == '-o': + outfile = option[1] + elif option[0] == '-t': + tit = option[1] + elif option[0] == '--insert': + insert = 1 + yaxis = "Time (s)" + gtitle = "Insert time for table" + elif option[0] == '--create-index': + create_index = option[1] + yaxis = "Time (s)" + gtitle = "Create index time for " + create_index + " column" + elif option[0] == '--create-total': + create_total = 1 + yaxis = "Time (s)" + gtitle = "Create time for table + indexes" + elif option[0] == '--overlaps': + overlaps = 1 + yaxis = "Overlapping index + 1" + gtitle = "Overlapping for col4 column" + elif option[0] == '--entropy': + entropy = 1 + yaxis = "Entropy + 1" + gtitle = "Entropy for col4 column" + elif option[0] == '--table-size': + table_size = 1 + yaxis = "Size (MB)" + gtitle = "Table size" + elif option[0] == '--indexes-size': + indexes_size = 1 + yaxis = "Size (MB)" + #gtitle = "Indexes size" + gtitle = "Index size for col4 column" + elif option[0] == '--total-size': + total_size = 1 + yaxis = "Size (MB)" + gtitle = "Total size (table + indexes)" + elif option[0] == '--query': + query = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (first query)" + elif option[0] == '--query-cold': + query_cold = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (cold cache)" + elif option[0] == '--query-warm': + query_warm = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (warm cache)" + elif option[0] == '--query-repeated': + query_repeated = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (repeated query)" + + gtitle = gtitle.replace('col2', 'Int32') + gtitle = gtitle.replace('col4', 'Float64') + + filenames = pargs + + if tit: + gtitle = tit + + plots = [] + legends = [] + for i, filename in enumerate(filenames): + plegend = filename[:filename.index('.out')] + plegend = plegend.replace('-', ' ') + #plegend = plegend.replace('zlib1', '') + if filename.find('PyTables') != -1: + xval, yval = get_values(filename) + print("Values for %s --> %s, %s" % (filename, xval, yval)) + if xval != []: + plot = loglog(xval, yval) + #plot = semilogx(xval, yval) + setp(plot, marker=markers[i], markersize=markersize, + linewidth=linewidth) + plots.append(plot) + legends.append(plegend) + else: + xval, yval = get_values(filename) + print("Values for %s --> %s, %s" % (filename, xval, yval)) + plots.append(loglog(xval, yval, linewidth=3, color='m')) + #plots.append(semilogx(xval, yval, linewidth=linewidth, color='m')) + legends.append(plegend) + if 0: # Per a introduir dades simulades si es vol... + xval = [1000, 10000, 100000, 1000000, 10000000, + 100000000, 1000000000] +# yval = [0.003, 0.005, 0.02, 0.06, 1.2, +# 40, 210] + yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, + 0.2, 5.6] + plots.append(loglog(xval, yval, linewidth=linewidth)) + legends.append("PyTables Std") + show_plot(plots, yaxis, legends, gtitle) diff --git a/bench/indexed_search.py b/bench/indexed_search.py new file mode 100644 index 0000000..e7de81f --- /dev/null +++ b/bench/indexed_search.py @@ -0,0 +1,462 @@ +from __future__ import print_function +from time import time +import subprocess +import random +import numpy + +# Constants + +STEP = 1000 * 100 # the size of the buffer to fill the table, in rows +SCALE = 0.1 # standard deviation of the noise compared with actual + # values +NI_NTIMES = 1 # The number of queries for doing a mean (non-idx cols) +# COLDCACHE = 10 # The number of reads where the cache is considered 'cold' +# WARMCACHE = 50 # The number of reads until the cache is considered 'warmed' +# READ_TIMES = WARMCACHE+50 # The number of complete calls to DB.query_db() +# COLDCACHE = 50 # The number of reads where the cache is considered 'cold' +# WARMCACHE = 50 # The number of reads until the cache is considered 'warmed' +# READ_TIMES = WARMCACHE+50 # The number of complete calls to DB.query_db() +MROW = 1000 * 1000. + +# Test values +COLDCACHE = 5 # The number of reads where the cache is considered 'cold' +WARMCACHE = 5 # The number of reads until the cache is considered 'warmed' +READ_TIMES = 10 # The number of complete calls to DB.query_db() + +# global variables +rdm_cod = ['lin', 'rnd'] +prec = 6 # precision for printing floats purposes + + +def get_nrows(nrows_str): + if nrows_str.endswith("k"): + return int(float(nrows_str[:-1]) * 1000) + elif nrows_str.endswith("m"): + return int(float(nrows_str[:-1]) * 1000 * 1000) + elif nrows_str.endswith("g"): + return int(float(nrows_str[:-1]) * 1000 * 1000 * 1000) + else: + raise ValueError( + "value of nrows must end with either 'k', 'm' or 'g' suffixes.") + + +class DB(object): + + def __init__(self, nrows, rng, userandom): + global step, scale + self.step = STEP + self.scale = SCALE + self.rng = rng + self.userandom = userandom + self.filename = '-'.join([rdm_cod[userandom], nrows]) + self.nrows = get_nrows(nrows) + + def get_db_size(self): + sout = subprocess.Popen("sync;du -s %s" % self.filename, shell=True, + stdout=subprocess.PIPE).stdout + line = [l for l in sout][0] + return int(line.split()[0]) + + def print_mtime(self, t1, explain): + mtime = time() - t1 + print("%s:" % explain, round(mtime, 6)) + print("Krows/s:", round((self.nrows / 1000.) / mtime, 6)) + + def print_qtime(self, colname, ltimes): + qtime1 = ltimes[0] # First measured time + qtime2 = ltimes[-1] # Last measured time + print("Query time for %s:" % colname, round(qtime1, 6)) + print("Mrows/s:", round((self.nrows / (MROW)) / qtime1, 6)) + print("Query time for %s (cached):" % colname, round(qtime2, 6)) + print("Mrows/s (cached):", round((self.nrows / (MROW)) / qtime2, 6)) + + def norm_times(self, ltimes): + "Get the mean and stddev of ltimes, avoiding the extreme values." + lmean = ltimes.mean() + lstd = ltimes.std() + ntimes = ltimes[ltimes < lmean + lstd] + nmean = ntimes.mean() + nstd = ntimes.std() + return nmean, nstd + + def print_qtime_idx(self, colname, ltimes, repeated, verbose): + if repeated: + r = "[REP] " + else: + r = "[NOREP] " + ltimes = numpy.array(ltimes) + ntimes = len(ltimes) + qtime1 = ltimes[0] # First measured time + ctimes = ltimes[1:COLDCACHE] + cmean, cstd = self.norm_times(ctimes) + wtimes = ltimes[WARMCACHE:] + wmean, wstd = self.norm_times(wtimes) + if verbose: + print("Times for cold cache:\n", ctimes) + # print "Times for warm cache:\n", wtimes + print("Histogram for warm cache: %s\n%s" % + numpy.histogram(wtimes)) + print("%s1st query time for %s:" % (r, colname), + round(qtime1, prec)) + print("%sQuery time for %s (cold cache):" % (r, colname), + round(cmean, prec), "+-", round(cstd, prec)) + print("%sQuery time for %s (warm cache):" % (r, colname), + round(wmean, prec), "+-", round(wstd, prec)) + + def print_db_sizes(self, init, filled, indexed): + table_size = (filled - init) / 1024. + indexes_size = (indexed - filled) / 1024. + print("Table size (MB):", round(table_size, 3)) + print("Indexes size (MB):", round(indexes_size, 3)) + print("Full size (MB):", round(table_size + indexes_size, 3)) + + def fill_arrays(self, start, stop): + arr_f8 = numpy.arange(start, stop, dtype='float64') + arr_i4 = numpy.arange(start, stop, dtype='int32') + if self.userandom: + arr_f8 += numpy.random.normal(0, stop * self.scale, + size=stop - start) + arr_i4 = numpy.array(arr_f8, dtype='int32') + return arr_i4, arr_f8 + + def create_db(self, dtype, kind, optlevel, verbose): + self.con = self.open_db(remove=1) + self.create_table(self.con) + init_size = self.get_db_size() + t1 = time() + self.fill_table(self.con) + table_size = self.get_db_size() + self.print_mtime(t1, 'Insert time') + self.index_db(dtype, kind, optlevel, verbose) + indexes_size = self.get_db_size() + self.print_db_sizes(init_size, table_size, indexes_size) + self.close_db(self.con) + + def index_db(self, dtype, kind, optlevel, verbose): + if dtype == "int": + idx_cols = ['col2'] + elif dtype == "float": + idx_cols = ['col4'] + else: + idx_cols = ['col2', 'col4'] + for colname in idx_cols: + t1 = time() + self.index_col(self.con, colname, kind, optlevel, verbose) + self.print_mtime(t1, 'Index time (%s)' % colname) + + def query_db(self, niter, dtype, onlyidxquery, onlynonidxquery, + avoidfscache, verbose, inkernel): + self.con = self.open_db() + if dtype == "int": + reg_cols = ['col1'] + idx_cols = ['col2'] + elif dtype == "float": + reg_cols = ['col3'] + idx_cols = ['col4'] + else: + reg_cols = ['col1', 'col3'] + idx_cols = ['col2', 'col4'] + if avoidfscache: + rseed = int(numpy.random.randint(self.nrows)) + else: + rseed = 19 + # Query for non-indexed columns + numpy.random.seed(rseed) + base = numpy.random.randint(self.nrows) + if not onlyidxquery: + for colname in reg_cols: + ltimes = [] + random.seed(rseed) + for i in range(NI_NTIMES): + t1 = time() + results = self.do_query(self.con, colname, base, inkernel) + ltimes.append(time() - t1) + if verbose: + print("Results len:", results) + self.print_qtime(colname, ltimes) + # Always reopen the file after *every* query loop. + # Necessary to make the benchmark to run correctly. + self.close_db(self.con) + self.con = self.open_db() + # Query for indexed columns + if not onlynonidxquery: + for colname in idx_cols: + ltimes = [] + numpy.random.seed(rseed) + rndbase = numpy.random.randint(self.nrows, size=niter) + # First, non-repeated queries + for i in range(niter): + base = rndbase[i] + t1 = time() + results = self.do_query(self.con, colname, base, inkernel) + #results, tprof = self.do_query( + # self.con, colname, base, inkernel) + ltimes.append(time() - t1) + if verbose: + print("Results len:", results) + self.print_qtime_idx(colname, ltimes, False, verbose) + # Always reopen the file after *every* query loop. + # Necessary to make the benchmark to run correctly. + self.close_db(self.con) + self.con = self.open_db() + ltimes = [] +# Second, repeated queries +# for i in range(niter): +# t1=time() +# results = self.do_query( +# self.con, colname, base, inkernel) +# results, tprof = self.do_query(self.con, colname, base, inkernel) +# ltimes.append(time()-t1) +# if verbose: +# print "Results len:", results +# self.print_qtime_idx(colname, ltimes, True, verbose) + # Print internal PyTables index tprof statistics + #tprof = numpy.array(tprof) + #tmean, tstd = self.norm_times(tprof) + # print "tprof-->", round(tmean, prec), "+-", round(tstd, prec) + # print "tprof hist-->", \ + # numpy.histogram(tprof) + # print "tprof raw-->", tprof + # Always reopen the file after *every* query loop. + # Necessary to make the benchmark to run correctly. + self.close_db(self.con) + self.con = self.open_db() + # Finally, close the file. + self.close_db(self.con) + + def close_db(self, con): + con.close() + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-T] [-P] [-v] [-f] [-k] [-p] [-m] [-c] [-q] [-i] [-I] [-S] [-x] [-z complevel] [-l complib] [-R range] [-N niter] [-n nrows] [-d datadir] [-O level] [-t kind] [-s] col -Q [suplim] + -T use Pytables + -P use Postgres + -v verbose + -f do a profile of the run (only query functionality & Python 2.5) + -k do a profile for kcachegrind use (out file is 'indexed_search.kcg') + -p use "psyco" if available + -m use random values to fill the table + -q do a query (both indexed and non-indexed versions) + -i do a query (just indexed one) + -I do a query (just in-kernel one) + -S do a query (just standard one) + -x choose a different seed for random numbers (i.e. avoid FS cache) + -c create the database + -z compress with zlib (no compression by default) + -l use complib for compression (zlib used by default) + -R select a range in a field in the form "start,stop" (def "0,10") + -N number of iterations for reading + -n sets the number of rows (in krows) in each table + -d directory to save data (default: data.nobackup) + -O set the optimization level for PyTables indexes + -t select the index type: "medium" (default) or "full", "light", "ultralight" + -s select a type column for operations ('int' or 'float'. def all) + -Q do a repeteated query up to 10**value + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], 'TPvfkpmcqiISxz:l:R:N:n:d:O:t:s:Q:') + except: + sys.stderr.write(usage) + sys.exit(1) + + # default options + usepytables = 0 + usepostgres = 0 + verbose = 0 + doprofile = 0 + dokprofile = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + optlevel = 0 + kind = "medium" + docompress = 0 + complib = "zlib" + doquery = False + onlyidxquery = False + onlynonidxquery = False + inkernel = True + avoidfscache = 0 + #rng = [-10, 10] + rng = [-1000, -1000] + repeatquery = 0 + repeatvalue = 0 + krows = '1k' + niter = READ_TIMES + dtype = "all" + datadir = "data.nobackup" + + # Get the options + for option in opts: + if option[0] == '-T': + usepytables = 1 + elif option[0] == '-P': + usepostgres = 1 + elif option[0] == '-v': + verbose = 1 + elif option[0] == '-f': + doprofile = 1 + elif option[0] == '-k': + dokprofile = 1 + elif option[0] == '-p': + usepsyco = 1 + elif option[0] == '-m': + userandom = 1 + elif option[0] == '-c': + docreate = 1 + elif option[0] == '-q': + doquery = True + elif option[0] == '-i': + doquery = True + onlyidxquery = True + elif option[0] == '-I': + doquery = True + onlynonidxquery = True + elif option[0] == '-S': + doquery = True + onlynonidxquery = True + inkernel = False + elif option[0] == '-x': + avoidfscache = 1 + elif option[0] == '-z': + docompress = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-N': + niter = int(option[1]) + elif option[0] == '-n': + krows = option[1] + elif option[0] == '-d': + datadir = option[1] + elif option[0] == '-O': + optlevel = int(option[1]) + elif option[0] == '-t': + if option[1] in ('full', 'medium', 'light', 'ultralight'): + kind = option[1] + else: + print("kind should be either 'full', 'medium', 'light' or " + "'ultralight'") + sys.exit(1) + elif option[0] == '-s': + if option[1] in ('int', 'float'): + dtype = option[1] + else: + print("column should be either 'int' or 'float'") + sys.exit(1) + elif option[0] == '-Q': + repeatquery = 1 + repeatvalue = int(option[1]) + + # If not database backend selected, abort + if not usepytables and not usepostgres: + print("Please select a backend:") + print("PyTables: -T") + print("Postgres: -P") + sys.exit(1) + + # Create the class for the database + if usepytables: + from pytables_backend import PyTables_DB + db = PyTables_DB(krows, rng, userandom, datadir, + docompress, complib, kind, optlevel) + elif usepostgres: + from postgres_backend import Postgres_DB + db = Postgres_DB(krows, rng, userandom) + + if not avoidfscache: + # in order to always generate the same random sequence + numpy.random.seed(20) + + if verbose: + if userandom: + print("using random values") + if onlyidxquery: + print("doing indexed queries only") + + if psyco_imported and usepsyco: + psyco.bind(db.create_db) + psyco.bind(db.query_db) + + if docreate: + if verbose: + print("writing %s rows" % krows) + db.create_db(dtype, kind, optlevel, verbose) + + if doquery: + print("Calling query_db() %s times" % niter) + if doprofile: + import pstats + import cProfile as prof + prof.run( + 'db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, ' + 'avoidfscache, verbose, inkernel)', + 'indexed_search.prof') + stats = pstats.Stats('indexed_search.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + elif dokprofile: + from cProfile import Profile + import lsprofcalltree + prof = Profile() + prof.run( + 'db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, ' + 'avoidfscache, verbose, inkernel)') + kcg = lsprofcalltree.KCacheGrind(prof) + ofile = open('indexed_search.kcg', 'w') + kcg.output(ofile) + ofile.close() + elif doprofile: + import hotshot + import hotshot.stats + prof = hotshot.Profile("indexed_search.prof") + benchtime, stones = prof.run( + 'db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, ' + 'avoidfscache, verbose, inkernel)') + prof.close() + stats = hotshot.stats.load("indexed_search.prof") + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(20) + else: + db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, + avoidfscache, verbose, inkernel) + + if repeatquery: + # Start by a range which is almost None + db.rng = [1, 1] + if verbose: + print("range:", db.rng) + db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, + avoidfscache, verbose, inkernel) + for i in range(repeatvalue): + for j in (1, 2, 5): + rng = j * 10 ** i + db.rng = [-rng / 2, rng / 2] + if verbose: + print("range:", db.rng) +# if usepostgres: +# os.system( +# "echo 1 > /proc/sys/vm/drop_caches;" +# " /etc/init.d/postgresql restart") +# else: +# os.system("echo 1 > /proc/sys/vm/drop_caches") + db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, + avoidfscache, verbose, inkernel) diff --git a/bench/keysort.py b/bench/keysort.py new file mode 100644 index 0000000..b641641 --- /dev/null +++ b/bench/keysort.py @@ -0,0 +1,33 @@ +from __future__ import print_function +from tables.indexesextension import keysort +import numpy +from time import time + +N = 1000 * 1000 +rnd = numpy.random.randint(N, size=N) + +for dtype1 in ('S6', 'b1', + 'i1', 'i2', 'i4', 'i8', + 'u1', 'u2', 'u4', 'u8', 'f4', 'f8'): + for dtype2 in ('u4', 'i8'): + print("dtype array1, array2-->", dtype1, dtype2) + a = numpy.array(rnd, dtype1) + b = numpy.arange(N, dtype=dtype2) + c = a.copy() + + t1 = time() + d = c.argsort() + # c.sort() + # e=c + e = c[d] + f = b[d] + tref = time() - t1 + print("normal sort time-->", tref) + + t1 = time() + keysort(a, b) + tks = time() - t1 + print("keysort time-->", tks, " %.2fx" % (tref / tks,)) + assert numpy.alltrue(a == e) + #assert numpy.alltrue(b == d) + assert numpy.alltrue(f == d) diff --git a/bench/lookup_bench.py b/bench/lookup_bench.py new file mode 100644 index 0000000..49c35db --- /dev/null +++ b/bench/lookup_bench.py @@ -0,0 +1,241 @@ +"""Benchmark to help choosing the best chunksize so as to optimize the access +time in random lookups.""" + +from __future__ import print_function +from time import time +import os +import subprocess +import numpy +import tables + +# Constants +NOISE = 1e-15 # standard deviation of the noise compared with actual values + +rdm_cod = ['lin', 'rnd'] + + +def get_nrows(nrows_str): + if nrows_str.endswith("k"): + return int(float(nrows_str[:-1]) * 1000) + elif nrows_str.endswith("m"): + return int(float(nrows_str[:-1]) * 1000 * 1000) + elif nrows_str.endswith("g"): + return int(float(nrows_str[:-1]) * 1000 * 1000 * 1000) + else: + raise ValueError( + "value of nrows must end with either 'k', 'm' or 'g' suffixes.") + + +class DB(object): + + def __init__(self, nrows, dtype, chunksize, userandom, datadir, + docompress=0, complib='zlib'): + self.dtype = dtype + self.docompress = docompress + self.complib = complib + self.filename = '-'.join([rdm_cod[userandom], + "n" + nrows, "s" + chunksize, dtype]) + # Complete the filename + self.filename = "lookup-" + self.filename + if docompress: + self.filename += '-' + complib + str(docompress) + self.filename = datadir + '/' + self.filename + '.h5' + print("Processing database:", self.filename) + self.userandom = userandom + self.nrows = get_nrows(nrows) + self.chunksize = get_nrows(chunksize) + self.step = self.chunksize + self.scale = NOISE + + def get_db_size(self): + sout = subprocess.Popen("sync;du -s %s" % self.filename, shell=True, + stdout=subprocess.PIPE).stdout + line = [l for l in sout][0] + return int(line.split()[0]) + + def print_mtime(self, t1, explain): + mtime = time() - t1 + print("%s:" % explain, round(mtime, 6)) + print("Krows/s:", round((self.nrows / 1000.) / mtime, 6)) + + def print_db_sizes(self, init, filled): + array_size = (filled - init) / 1024. + print("Array size (MB):", round(array_size, 3)) + + def open_db(self, remove=0): + if remove and os.path.exists(self.filename): + os.remove(self.filename) + con = tables.open_file(self.filename, 'a') + return con + + def create_db(self, verbose): + self.con = self.open_db(remove=1) + self.create_array() + init_size = self.get_db_size() + t1 = time() + self.fill_array() + array_size = self.get_db_size() + self.print_mtime(t1, 'Insert time') + self.print_db_sizes(init_size, array_size) + self.close_db() + + def create_array(self): + # The filters chosen + filters = tables.Filters(complevel=self.docompress, + complib=self.complib) + atom = tables.Atom.from_kind(self.dtype) + self.con.create_earray(self.con.root, 'earray', atom, (0,), + filters=filters, + expectedrows=self.nrows, + chunkshape=(self.chunksize,)) + + def fill_array(self): + "Fills the array" + earray = self.con.root.earray + j = 0 + arr = self.get_array(0, self.step) + for i in range(0, self.nrows, self.step): + stop = (j + 1) * self.step + if stop > self.nrows: + stop = self.nrows + ###arr = self.get_array(i, stop, dtype) + earray.append(arr) + j += 1 + earray.flush() + + def get_array(self, start, stop): + arr = numpy.arange(start, stop, dtype='float') + if self.userandom: + arr += numpy.random.normal(0, stop * self.scale, size=stop - start) + arr = arr.astype(self.dtype) + return arr + + def print_qtime(self, ltimes): + ltimes = numpy.array(ltimes) + print("Raw query times:\n", ltimes) + print("Histogram times:\n", numpy.histogram(ltimes[1:])) + ntimes = len(ltimes) + qtime1 = ltimes[0] # First measured time + if ntimes > 5: + # Wait until the 5th iteration (in order to + # ensure that the index is effectively cached) to take times + qtime2 = sum(ltimes[5:]) / (ntimes - 5) + else: + qtime2 = ltimes[-1] # Last measured time + print("1st query time:", round(qtime1, 3)) + print("Mean (skipping the first 5 meas.):", round(qtime2, 3)) + + def query_db(self, niter, avoidfscache, verbose): + self.con = self.open_db() + earray = self.con.root.earray + if avoidfscache: + rseed = int(numpy.random.randint(self.nrows)) + else: + rseed = 19 + numpy.random.seed(rseed) + numpy.random.randint(self.nrows) + ltimes = [] + for i in range(niter): + t1 = time() + self.do_query(earray, numpy.random.randint(self.nrows)) + ltimes.append(time() - t1) + self.print_qtime(ltimes) + self.close_db() + + def do_query(self, earray, idx): + return earray[idx] + + def close_db(self): + self.con.close() + + +if __name__ == "__main__": + import sys + import getopt + + usage = """usage: %s [-v] [-m] [-c] [-q] [-x] [-z complevel] [-l complib] [-N niter] [-n nrows] [-d datadir] [-t] type [-s] chunksize + -v verbose + -m use random values to fill the array + -q do a (random) lookup + -x choose a different seed for random numbers (i.e. avoid FS cache) + -c create the file + -z compress with zlib (no compression by default) + -l use complib for compression (zlib used by default) + -N number of iterations for reading + -n sets the number of rows in the array + -d directory to save data (default: data.nobackup) + -t select the type for array ('int' or 'float'. def 'float') + -s select the chunksize for array + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vmcqxz:l:N:n:d:t:s:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + userandom = 0 + docreate = 0 + optlevel = 0 + docompress = 0 + complib = "zlib" + doquery = False + avoidfscache = 0 + krows = '1k' + chunksize = '32k' + niter = 50 + datadir = "data.nobackup" + dtype = "float" + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + elif option[0] == '-m': + userandom = 1 + elif option[0] == '-c': + docreate = 1 + createindex = 1 + elif option[0] == '-q': + doquery = True + elif option[0] == '-x': + avoidfscache = 1 + elif option[0] == '-z': + docompress = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-N': + niter = int(option[1]) + elif option[0] == '-n': + krows = option[1] + elif option[0] == '-d': + datadir = option[1] + elif option[0] == '-t': + if option[1] in ('int', 'float'): + dtype = option[1] + else: + print("type should be either 'int' or 'float'") + sys.exit(0) + elif option[0] == '-s': + chunksize = option[1] + + if not avoidfscache: + # in order to always generate the same random sequence + numpy.random.seed(20) + + if verbose: + if userandom: + print("using random values") + + db = DB(krows, dtype, chunksize, userandom, datadir, docompress, complib) + + if docreate: + if verbose: + print("writing %s rows" % krows) + db.create_db(verbose) + + if doquery: + print("Calling query_db() %s times" % niter) + db.query_db(niter, avoidfscache, verbose) diff --git a/bench/open_close-bench.py b/bench/open_close-bench.py new file mode 100644 index 0000000..08df89b --- /dev/null +++ b/bench/open_close-bench.py @@ -0,0 +1,236 @@ +"""Testbed for open/close PyTables files. + +This uses the HotShot profiler. + +""" + +from __future__ import print_function +import os +import sys +import getopt +import pstats +import cProfile as prof +import time +import subprocess # From Python 2.4 on +import tables + +filename = None +niter = 1 + + +def show_stats(explain, tref): + "Show the used memory" + # Build the command to obtain memory info (only for Linux 2.6.x) + cmd = "cat /proc/%s/status" % os.getpid() + sout = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout + for line in sout: + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + sout.close() + print("WallClock time:", time.time() - tref) + print("Memory usage: ******* %s *******" % explain) + print("VmSize: %7s kB\tVmRSS: %7s kB" % (vmsize, vmrss)) + print("VmData: %7s kB\tVmStk: %7s kB" % (vmdata, vmstk)) + print("VmExe: %7s kB\tVmLib: %7s kB" % (vmexe, vmlib)) + + +def check_open_close(): + for i in range(niter): + print( + "------------------ open_close #%s -------------------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + fileh.close() + show_stats("After closing file", tref) + + +def check_only_open(): + for i in range(niter): + print("------------------ only_open #%s -------------------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + show_stats("Before closing file", tref) + fileh.close() + + +def check_full_browse(): + for i in range(niter): + print("------------------ full_browse #%s -----------------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + for node in fileh: + pass + fileh.close() + show_stats("After full browse", tref) + + +def check_partial_browse(): + for i in range(niter): + print("------------------ partial_browse #%s --------------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + for node in fileh.root.ngroup0.ngroup1: + pass + fileh.close() + show_stats("After closing file", tref) + + +def check_full_browse_attrs(): + for i in range(niter): + print("------------------ full_browse_attrs #%s -----------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + for node in fileh: + # Access to an attribute + klass = node._v_attrs.CLASS + fileh.close() + show_stats("After full browse", tref) + + +def check_partial_browse_attrs(): + for i in range(niter): + print("------------------ partial_browse_attrs #%s --------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + for node in fileh.root.ngroup0.ngroup1: + # Access to an attribute + klass = node._v_attrs.CLASS + fileh.close() + show_stats("After closing file", tref) + + +def check_open_group(): + for i in range(niter): + print("------------------ open_group #%s ------------------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + group = fileh.root.ngroup0.ngroup1 + # Access to an attribute + klass = group._v_attrs.CLASS + fileh.close() + show_stats("After closing file", tref) + + +def check_open_leaf(): + for i in range(niter): + print("------------------ open_leaf #%s -----------------------" % i) + tref = time.time() + fileh = tables.open_file(filename) + leaf = fileh.root.ngroup0.ngroup1.array9 + # Access to an attribute + klass = leaf._v_attrs.CLASS + fileh.close() + show_stats("After closing file", tref) + + +if __name__ == '__main__': + + usage = """usage: %s [-v] [-p] [-n niter] [-O] [-o] [-B] [-b] [-g] [-l] [-A] [-a] [-E] [-S] datafile + -v verbose (total dump of profiling) + -p do profiling + -n number of iterations for reading + -O Check open_close + -o Check only_open + -B Check full browse + -b Check partial browse + -A Check full browse and reading one attr each node + -a Check partial browse and reading one attr each node + -g Check open nested group + -l Check open nested leaf + -E Check everything + -S Check everything as subprocess + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpn:OoBbAaglESs') + except: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + silent = 0 # if silent, does not print the final statistics + profile = 0 + all_checks = 0 + all_system_checks = 0 + func = [] + + # Checking options + options = ['-O', '-o', '-B', '-b', '-A', '-a', '-g', '-l'] + + # Dict to map options to checking functions + option2func = { + '-O': 'check_open_close', + '-o': 'check_only_open', + '-B': 'check_full_browse', + '-b': 'check_partial_browse', + '-A': 'check_full_browse_attrs', + '-a': 'check_partial_browse_attrs', + '-g': 'check_open_group', + '-l': 'check_open_leaf', + } + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + elif option[0] == '-p': + profile = 1 + elif option[0] in option2func: + func.append(option2func[option[0]]) + elif option[0] == '-E': + all_checks = 1 + for opt in options: + func.append(option2func[opt]) + elif option[0] == '-S': + all_system_checks = 1 + elif option[0] == '-s': + silent = 1 + elif option[0] == '-n': + niter = int(option[1]) + + filename = pargs[0] + + tref = time.time() + if all_system_checks: + args.remove('-S') # We don't want -S in the options list again + for opt in options: + opts = "%s \-s %s %s" % (progname, opt, " ".join(args)) + # print "opts-->", opts + os.system("python2.4 %s" % opts) + else: + if profile: + for ifunc in func: + prof.run(ifunc + '()', ifunc + '.prof') + stats = pstats.Stats(ifunc + '.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + for ifunc in func: + eval(ifunc + '()') + + if not silent: + print("------------------ End of run -------------------------") + show_stats("Final statistics (after closing everything)", tref) diff --git a/bench/opteron-stress-test.txt b/bench/opteron-stress-test.txt new file mode 100644 index 0000000..33dc599 --- /dev/null +++ b/bench/opteron-stress-test.txt @@ -0,0 +1,63 @@ +Stress test on a 64 bits AMD Opteron platform +============================================= +2004-02-04. F. Alted + +Platform description: + +4 processors AMD Opteron (64-bits) @ 1.6 GHz and 1 MB cache +8 GB RAM +HD IBM DeskStar 120GXP 80 GB ATA/100 2 MB cache @ 7200 rpm +SuSe Linux Enterprise Server (SLES) +Linux kernel 2.4.21-178-smp +ReiserFS filesystem + +Here's the command to do the stress test: + +time python /tmp/stress-test3.py -l zlib -c 6 -g400 -t 300 -i 20000 /tmp/test-big-zlib-6.h5 +ls -lh /tmp/test-big-zlib-6.h5 + +The output: + +Compression level: 6 +Compression library: zlib +Rows written: 2400000000 Row size: 512 +Time writing rows: 56173.557 s (real) 56154.84 s (cpu) 100% +Write rows/sec: 42724 +Write KB/s : 21362 +Rows read: 2400000000 Row size: 512 Buf size: 39936 +Time reading rows: 29339.936 s (real) 29087.88 s (cpu) 99% +Read rows/sec: 81799 +Read KB/s : 40899 + +real 1425m43.846s +user 1308m34.340s +sys 112m17.100s +-rw-r--r-- 1 falted users 2.7G 2004-02-04 02:25 /tmp/test-big-zlib-6 +.h5 + +The maximum amount of RAM taken by the test should be less than 300 MB (241 +MB when the test was running for 5750 minutes, which is the last time I've +check for it). + + +Another test with the same machine: + +time python /tmp/stress-test3.py -l zlib -c 6 -g400 -t 300 -i 100000 /tmp/test-big-zlib-6-2.h5 +ls -lh /tmp/test-big-zlib-6-2.h5 + +Compression level: 6 +Compression library: zlib +Rows written: 12000000000 Row size: 512 +Time writing rows: 262930.901 s (real) 262619.72 s (cpu) 100% +Write rows/sec: 45639 +Write KB/s : 22819 +Rows read: 12000000000 Row size: 512 Buf size: 49664 +Time reading rows: 143171.761 s (real) 141560.42 s (cpu) 99% +Read rows/sec: 83815 +Read KB/s : 41907 + +real 6768m34.076s +user 6183m38.690s +sys 552m51.150s +-rw-r--r-- 1 5350 users 11G 2004-02-09 00:57 /tmp/test-big-zlib-6 +-2.h5 diff --git a/bench/optimal-chunksize.py b/bench/optimal-chunksize.py new file mode 100644 index 0000000..e0660be --- /dev/null +++ b/bench/optimal-chunksize.py @@ -0,0 +1,126 @@ +"""Small benchmark on the effect of chunksizes and compression on HDF5 files. + +Francesc Alted +2007-11-25 + +""" + +from __future__ import print_function +import os +import math +import subprocess +import tempfile +from time import time +import numpy +import tables + +# Size of dataset +# N, M = 512, 2**16 # 256 MB +# N, M = 512, 2**18 # 1 GB +# N, M = 512, 2**19 # 2 GB +N, M = 2000, 1000000 # 15 GB +# N, M = 4000, 1000000 # 30 GB +datom = tables.Float64Atom() # elements are double precision + + +def quantize(data, least_significant_digit): + """Quantize data to improve compression. + + data is quantized using around(scale*data)/scale, where scale is + 2**bits, and bits is determined from the least_significant_digit. + For example, if least_significant_digit=1, bits will be 4. + + """ + + precision = 10. ** -least_significant_digit + exp = math.log(precision, 10) + if exp < 0: + exp = int(math.floor(exp)) + else: + exp = int(math.ceil(exp)) + bits = math.ceil(math.log(10. ** -exp, 2)) + scale = 2. ** bits + return numpy.around(scale * data) / scale + + +def get_db_size(filename): + sout = subprocess.Popen("ls -sh %s" % filename, shell=True, + stdout=subprocess.PIPE).stdout + line = [l for l in sout][0] + return line.split()[0] + + +def bench(chunkshape, filters): + numpy.random.seed(1) # to have reproductible results + filename = tempfile.mktemp(suffix='.h5') + print("Doing test on the file system represented by:", filename) + + f = tables.open_file(filename, 'w') + e = f.create_earray(f.root, 'earray', datom, shape=(0, M), + filters = filters, + chunkshape = chunkshape) + # Fill the array + t1 = time() + for i in range(N): + # e.append([numpy.random.rand(M)]) # use this for less compressibility + e.append([quantize(numpy.random.rand(M), 6)]) + # os.system("sync") + print("Creation time:", round(time() - t1, 3), end=' ') + filesize = get_db_size(filename) + filesize_bytes = os.stat(filename)[6] + print("\t\tFile size: %d -- (%s)" % (filesize_bytes, filesize)) + + # Read in sequential mode: + e = f.root.earray + t1 = time() + # Flush everything to disk and flush caches + #os.system("sync; echo 1 > /proc/sys/vm/drop_caches") + for row in e: + t = row + print("Sequential read time:", round(time() - t1, 3), end=' ') + + # f.close() + # return + + # Read in random mode: + i_index = numpy.random.randint(0, N, 128) + j_index = numpy.random.randint(0, M, 256) + # Flush everything to disk and flush caches + #os.system("sync; echo 1 > /proc/sys/vm/drop_caches") + + # Protection against too large chunksizes + # 4 MB + if 0 and filters.complevel and chunkshape[0] * chunkshape[1] * 8 > 2 ** 22: + f.close() + return + + t1 = time() + for i in i_index: + for j in j_index: + t = e[i, j] + print("\tRandom read time:", round(time() - t1, 3)) + + f.close() + +# Benchmark with different chunksizes and filters +# for complevel in (0, 1, 3, 6, 9): +for complib in (None, 'zlib', 'lzo', 'blosc'): +# for complib in ('blosc',): + if complib: + filters = tables.Filters(complevel=5, complib=complib) + else: + filters = tables.Filters(complevel=0) + print("8<--" * 20, "\nFilters:", filters, "\n" + "-" * 80) + # for ecs in (11, 14, 17, 20, 21, 22): + for ecs in range(10, 24): + # for ecs in (19,): + chunksize = 2 ** ecs + chunk1 = 1 + chunk2 = chunksize / datom.itemsize + if chunk2 > M: + chunk1 = chunk2 / M + chunk2 = M + chunkshape = (chunk1, chunk2) + cs_str = str(chunksize / 1024) + " KB" + print("***** Chunksize:", cs_str, "/ Chunkshape:", chunkshape, "*****") + bench(chunkshape, filters) diff --git a/bench/plot-bar.py b/bench/plot-bar.py new file mode 100644 index 0000000..4c57fcb --- /dev/null +++ b/bench/plot-bar.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# a stacked bar plot with errorbars + +from __future__ import print_function +from pylab import * + +checks = ['open_close', 'only_open', + 'full_browse', 'partial_browse', + 'full_browse_attrs', 'partial_browse_attrs', + 'open_group', 'open_leaf', + 'total'] +width = 0.15 # the width of the bars: can also be len(x) sequence +colors = ['r', 'm', 'g', 'y', 'b'] +ind = arange(len(checks)) # the x locations for the groups + + +def get_values(filename): + values = [] + f = open(filename) + for line in f: + if show_memory: + if line.startswith('VmData:'): + values.append(float(line.split()[1]) / 1024.) + else: + if line.startswith('WallClock time:'): + values.append(float(line.split(':')[1])) + f.close() + return values + + +def plot_bar(values, n): + global ind + if not gtotal: + # Remove the grand totals + values.pop() + if n == 0: + checks.pop() + ind = arange(len(checks)) + p = bar(ind + width * n, values, width, color=colors[n]) + return p + + +def show_plot(bars, filenames, tit): + if show_memory: + ylabel('Memory (MB)') + else: + ylabel('Time (s)') + title(tit) + n = len(filenames) + xticks(ind + width * n / 2., checks, rotation=45, + horizontalalignment='right', fontsize=8) + if not gtotal: + #loc = 'center right' + loc = 'upper left' + else: + loc = 'center left' + + legends = [f[:f.index('_')] for f in filenames] + legends = [l.replace('-', ' ') for l in legends] + legend([p[0] for p in bars], legends, loc=loc) + + subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + savefig(outfile) + else: + show() + +if __name__ == '__main__': + + import sys + import getopt + + usage = """usage: %s [-g] [-m] [-o file] [-t title] files + -g grand total + -m show memory instead of time + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'gmo:t:') + except: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + tit = "Comparison of differents PyTables versions" + gtotal = 0 + show_memory = 0 + outfile = None + + # Get the options + for option in opts: + if option[0] == '-g': + gtotal = 1 + elif option[0] == '-m': + show_memory = 1 + elif option[0] == '-o': + outfile = option[1] + elif option[0] == '-t': + tit = option[1] + + filenames = pargs + bars = [] + n = 0 + for filename in filenames: + values = get_values(filename) + print("Values-->", values) + bars.append(plot_bar(values, n)) + n += 1 + show_plot(bars, filenames, tit) diff --git a/bench/plot-comparison-lzo-zlib-ucl.gnuplot b/bench/plot-comparison-lzo-zlib-ucl.gnuplot new file mode 100644 index 0000000..bda1f52 --- /dev/null +++ b/bench/plot-comparison-lzo-zlib-ucl.gnuplot @@ -0,0 +1,27 @@ +#set term post color +set term post eps color +set xlabel "Number of rows" +set ylabel "Speed (Krow/s)" + +set linestyle 1 lw 7 +set linestyle 2 lw 7 +set linestyle 3 lw 7 +set linestyle 4 lw 7 +set logscale x + +# For small record size +set output "read-small-lzo-zlib-ucl-comparison.eps" +set tit "Selecting with small record size (16 bytes)" +pl [1000:] [0:1000] "small-nc.out" u ($1):($10) t "No compression" w linesp ls 1, \ + "small-zlib.out" u ($1):($10) t "ZLIB" w linesp ls 2, \ + "small-lzo.out" u ($1):($10) t "LZO" w linesp ls 3, \ + "small-ucl.out" u ($1):($10) t "UCL" w linesp ls 4 + +# For small record size +set output "write-small-lzo-zlib-ucl-comparison.eps" +set tit "Writing with small record size (16 bytes)" +pl [1000:] [0:500] "small-nc.out" u ($1):($5) tit "No compression" w linesp ls 1, \ + "small-zlib.out" u ($1):($5) tit "ZLIB" w linesp ls 2, \ + "small-lzo.out" u ($1):($5) tit "LZO" w linesp ls 3, \ + "small-ucl.out" u ($1):($5) tit "UCL" w linesp ls 4 + diff --git a/bench/plot-comparison-psyco-lzo.gnuplot b/bench/plot-comparison-psyco-lzo.gnuplot new file mode 100644 index 0000000..fb71de9 --- /dev/null +++ b/bench/plot-comparison-psyco-lzo.gnuplot @@ -0,0 +1,28 @@ +#set term post color +set term post eps color +set xlabel "Number of rows" +set ylabel "Speed (Krow/s)" + +set linestyle 1 lw 7 +set linestyle 2 lw 7 +set linestyle 3 lw 7 +set linestyle 4 lw 7 + +# For small record size +set output "read-small-psyco-lzo-comparison.eps" +set tit "Selecting with small record size (16 bytes)" +set logscale x +pl [1000:] [0:1200] "small-psyco-lzo.out" u ($1):($10) t "Psyco & compression (LZO)" w linesp ls 2, \ + "small-psyco-nc.out" u ($1):($10) tit "Psyco & no compresion" w linesp ls 3, \ + "small-lzo.out" u ($1):($10) t "No Psyco & compression (LZO)" w linesp ls 1, \ + "small-nc.out" u ($1):($10) tit "No Psyco & no compression" w linesp ls 4 + +# For small record size +set output "write-small-psyco-lzo-comparison.eps" +set tit "Writing with small record size (16 bytes)" +set logscale x +pl [1000:] [0:1000] "small-psyco-lzo.out" u ($1):($5) t "Psyco & compression (LZO)" w linesp ls 2, \ + "small-psyco-nc.out" u ($1):($5) tit "Psyco & no compresion" w linesp ls 3, \ + "small-lzo.out" u ($1):($5) t "No Psyco & compression (LZO)" w linesp ls 1, \ + "small-nc.out" u ($1):($5) tit "No Psyco & no compression" w linesp ls 4 + diff --git a/bench/poly.py b/bench/poly.py new file mode 100644 index 0000000..08752ad --- /dev/null +++ b/bench/poly.py @@ -0,0 +1,196 @@ +####################################################################### +# This script compares the speed of the computation of a polynomial +# for different (numpy.memmap and tables.Expr) out-of-memory paradigms. +# +# Author: Francesc Alted +# Date: 2010-02-24 +####################################################################### + +from __future__ import print_function +import os +from time import time +import numpy as np +import tables as tb +import numexpr as ne + +expr = ".25*x**3 + .75*x**2 - 1.5*x - 2" # the polynomial to compute +N = 10 * 1000 * 1000 # the number of points to compute expression (80 MB) +step = 100 * 1000 # perform calculation in slices of `step` elements +dtype = np.dtype('f8') # the datatype +#CHUNKSHAPE = (2**17,) +CHUNKSHAPE = None + +# Global variable for the x values for pure numpy & numexpr +x = None + +# *** The next variables do not need to be changed *** + +# Filenames for numpy.memmap +fprefix = "numpy.memmap" # the I/O file prefix +mpfnames = [fprefix + "-x.bin", fprefix + "-r.bin"] + +# Filename for tables.Expr +h5fname = "tablesExpr.h5" # the I/O file + +MB = 1024 * 1024. # a MegaByte + + +def print_filesize(filename, clib=None, clevel=0): + """Print some statistics about file sizes.""" + + # os.system("sync") # make sure that all data has been flushed to disk + if isinstance(filename, list): + filesize_bytes = 0 + for fname in filename: + filesize_bytes += os.stat(fname)[6] + else: + filesize_bytes = os.stat(filename)[6] + filesize_MB = round(filesize_bytes / MB, 1) + print("\t\tTotal file sizes: %d -- (%s MB)" % ( + filesize_bytes, filesize_MB), end=' ') + if clevel > 0: + print("(using %s lvl%s)" % (clib, clevel)) + else: + print() + + +def populate_x_numpy(): + """Populate the values in x axis for numpy.""" + global x + # Populate x in range [-1, 1] + x = np.linspace(-1, 1, N) + + +def populate_x_memmap(): + """Populate the values in x axis for numpy.memmap.""" + # Create container for input + x = np.memmap(mpfnames[0], dtype=dtype, mode="w+", shape=(N,)) + + # Populate x in range [-1, 1] + for i in range(0, N, step): + chunk = np.linspace((2 * i - N) / float(N), + (2 * (i + step) - N) / float(N), step) + x[i:i + step] = chunk + del x # close x memmap + + +def populate_x_tables(clib, clevel): + """Populate the values in x axis for pytables.""" + f = tb.open_file(h5fname, "w") + + # Create container for input + atom = tb.Atom.from_dtype(dtype) + filters = tb.Filters(complib=clib, complevel=clevel) + x = f.create_carray(f.root, "x", atom=atom, shape=(N,), + filters=filters, + chunkshape=CHUNKSHAPE, + ) + + # Populate x in range [-1, 1] + for i in range(0, N, step): + chunk = np.linspace((2 * i - N) / float(N), + (2 * (i + step) - N) / float(N), step) + x[i:i + step] = chunk + f.close() + + +def compute_numpy(): + """Compute the polynomial with pure numpy.""" + y = eval(expr) + + +def compute_numexpr(): + """Compute the polynomial with pure numexpr.""" + y = ne.evaluate(expr) + + +def compute_memmap(): + """Compute the polynomial with numpy.memmap.""" + # Reopen inputs in read-only mode + x = np.memmap(mpfnames[0], dtype=dtype, mode='r', shape=(N,)) + # Create the array output + r = np.memmap(mpfnames[1], dtype=dtype, mode="w+", shape=(N,)) + + # Do the computation by chunks and store in output + r[:] = eval(expr) # where is stored the result? + # r = eval(expr) # result is stored in-memory + + del x, r # close x and r memmap arrays + print_filesize(mpfnames) + + +def compute_tables(clib, clevel): + """Compute the polynomial with tables.Expr.""" + f = tb.open_file(h5fname, "a") + x = f.root.x # get the x input + # Create container for output + atom = tb.Atom.from_dtype(dtype) + filters = tb.Filters(complib=clib, complevel=clevel) + r = f.create_carray(f.root, "r", atom=atom, shape=(N,), + filters=filters, + chunkshape=CHUNKSHAPE, + ) + + # Do the actual computation and store in output + ex = tb.Expr(expr) # parse the expression + ex.set_output(r) # where is stored the result? + # when commented out, the result goes in-memory + ex.eval() # evaluate! + + f.close() + print_filesize(h5fname, clib, clevel) + + +if __name__ == '__main__': + + tb.print_versions() + + print("Total size for datasets:", + round(2 * N * dtype.itemsize / MB, 1), "MB") + + # Get the compression libraries supported + # supported_clibs = [clib for clib in ("zlib", "lzo", "bzip2", "blosc") + # supported_clibs = [clib for clib in ("zlib", "lzo", "blosc") + supported_clibs = [clib for clib in ("blosc",) + if tb.which_lib_version(clib)] + + # Initialization code + # for what in ["numpy", "numpy.memmap", "numexpr"]: + for what in ["numpy", "numexpr"]: + # break + print("Populating x using %s with %d points..." % (what, N)) + t0 = time() + if what == "numpy": + populate_x_numpy() + compute = compute_numpy + elif what == "numexpr": + populate_x_numpy() + compute = compute_numexpr + elif what == "numpy.memmap": + populate_x_memmap() + compute = compute_memmap + print("*** Time elapsed populating:", round(time() - t0, 3)) + print("Computing: '%s' using %s" % (expr, what)) + t0 = time() + compute() + print("**************** Time elapsed computing:", + round(time() - t0, 3)) + + for what in ["tables.Expr"]: + t0 = time() + first = True # Sentinel + for clib in supported_clibs: + # for clevel in (0, 1, 3, 6, 9): + for clevel in range(10): + # for clevel in (1,): + if not first and clevel == 0: + continue + print("Populating x using %s with %d points..." % (what, N)) + populate_x_tables(clib, clevel) + print("*** Time elapsed populating:", round(time() - t0, 3)) + print("Computing: '%s' using %s" % (expr, what)) + t0 = time() + compute_tables(clib, clevel) + print("**************** Time elapsed computing:", + round(time() - t0, 3)) + first = False diff --git a/bench/postgres-search-bench.py b/bench/postgres-search-bench.py new file mode 100644 index 0000000..d2c9f4f --- /dev/null +++ b/bench/postgres-search-bench.py @@ -0,0 +1,248 @@ +from __future__ import print_function +from time import time +import numpy +import random + +DSN = "dbname=test port = 5435" + +# in order to always generate the same random sequence +random.seed(19) + + +def flatten(l): + """Flattens list of tuples l.""" + return [x[0] for x in l] + + +def fill_arrays(start, stop): + col_i = numpy.arange(start, stop, type=numpy.Int32) + if userandom: + col_j = numpy.random.uniform(0, nrows, size=[stop - start]) + else: + col_j = numpy.array(col_i, type=numpy.Float64) + return col_i, col_j + +# Generator for ensure pytables benchmark compatibility + + +def int_generator(nrows): + step = 1000 * 100 + j = 0 + for i in range(nrows): + if i >= step * j: + stop = (j + 1) * step + if stop > nrows: # Seems unnecessary + stop = nrows + col_i, col_j = fill_arrays(i, stop) + j += 1 + k = 0 + yield (col_i[k], col_j[k]) + k += 1 + + +def int_generator_slow(nrows): + for i in range(nrows): + if userandom: + yield (i, float(random.randint(0, nrows))) + else: + yield (i, float(i)) + + +class Stream32(object): + + "Object simulating a file for reading" + + def __init__(self): + self.n = None + self.read_it = self.read_iter() + + # No va! Hi ha que convertir a un de normal! + def readline(self, n=None): + for tup in int_generator(nrows): + sout = "%s\t%s\n" % tup + if n is not None and len(sout) > n: + for i in range(0, len(sout), n): + yield sout[i:i + n] + else: + yield sout + + def read_iter(self): + sout = "" + n = self.n + for tup in int_generator(nrows): + sout += "%s\t%s\n" % tup + if n is not None and len(sout) > n: + for i in range(n, len(sout), n): + rout = sout[:n] + sout = sout[n:] + yield rout + yield sout + + def read(self, n=None): + self.n = n + try: + str = next(self.read_it) + except StopIteration: + str = "" + return str + + +def open_db(filename, remove=0): + if not filename: + con = sqlite.connect(DSN) + else: + con = sqlite.connect(filename) + cur = con.cursor() + return con, cur + + +def create_db(filename, nrows): + con, cur = open_db(filename, remove=1) + try: + cur.execute("create table ints(i integer, j double precision)") + except: + con.rollback() + cur.execute("DROP TABLE ints") + cur.execute("create table ints(i integer, j double precision)") + con.commit() + con.set_isolation_level(2) + t1 = time() + st = Stream32() + cur.copy_from(st, "ints") + # In case of postgres, the speeds of generator and loop are similar + #cur.executemany("insert into ints values (%s,%s)", int_generator(nrows)) +# for i in xrange(nrows): +# cur.execute("insert into ints values (%s,%s)", (i, float(i))) + con.commit() + ctime = time() - t1 + if verbose: + print("insert time:", round(ctime, 5)) + print("Krows/s:", round((nrows / 1000.) / ctime, 5)) + close_db(con, cur) + + +def index_db(filename): + con, cur = open_db(filename) + t1 = time() + cur.execute("create index ij on ints(j)") + con.commit() + itime = time() - t1 + if verbose: + print("index time:", round(itime, 5)) + print("Krows/s:", round(nrows / itime, 5)) + # Close the DB + close_db(con, cur) + + +def query_db(filename, rng): + con, cur = open_db(filename) + t1 = time() + ntimes = 10 + for i in range(ntimes): + # between clause does not seem to take advantage of indexes + # cur.execute("select j from ints where j between %s and %s" % \ + cur.execute("select i from ints where j >= %s and j <= %s" % + # cur.execute("select i from ints where i >= %s and i <= + # %s" % + (rng[0] + i, rng[1] + i)) + results = cur.fetchall() + con.commit() + qtime = (time() - t1) / ntimes + if verbose: + print("query time:", round(qtime, 5)) + print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) + results = sorted(flatten(results)) + print(results) + close_db(con, cur) + + +def close_db(con, cur): + cur.close() + con.close() + +if __name__ == "__main__": + import sys + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-m] [-i] [-q] [-c] [-R range] [-n nrows] file + -v verbose + -p use "psyco" if available + -m use random values to fill the table + -q do query + -c create the database + -i index the table + -2 use sqlite2 (default is use sqlite3) + -R select a range in a field in the form "start,stop" (def "0,10") + -n sets the number of rows (in krows) in each table + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpmiqc2R:n:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + createindex = 0 + doquery = 0 + sqlite_version = "3" + rng = [0, 10] + nrows = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + elif option[0] == '-p': + usepsyco = 1 + elif option[0] == '-m': + userandom = 1 + elif option[0] == '-i': + createindex = 1 + elif option[0] == '-q': + doquery = 1 + elif option[0] == '-c': + docreate = 1 + elif option[0] == "-2": + sqlite_version = "2" + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-n': + nrows = int(option[1]) + + # Catch the hdf5 file passed as the last argument + filename = pargs[0] + +# if sqlite_version == "2": +# import sqlite +# else: +# from pysqlite2 import dbapi2 as sqlite + import psycopg2 as sqlite + + if verbose: + # print "pysqlite version:", sqlite.version + if userandom: + print("using random values") + + if docreate: + if verbose: + print("writing %s krows" % nrows) + if psyco_imported and usepsyco: + psyco.bind(create_db) + nrows *= 1000 + create_db(filename, nrows) + + if createindex: + index_db(filename) + + if doquery: + query_db(filename, rng) diff --git a/bench/postgres_backend.py b/bench/postgres_backend.py new file mode 100644 index 0000000..cae5f7b --- /dev/null +++ b/bench/postgres_backend.py @@ -0,0 +1,156 @@ +from __future__ import print_function +import subprocess # Needs Python 2.4 +from indexed_search import DB +import psycopg2 as db2 + +CLUSTER_NAME = "base" +DATA_DIR = "/scratch2/postgres/data/%s" % CLUSTER_NAME +#DATA_DIR = "/var/lib/pgsql/data/%s" % CLUSTER_NAME +DSN = "dbname=%s port=%s" +CREATE_DB = "createdb %s" +DROP_DB = "dropdb %s" +TABLE_NAME = "intsfloats" +PORT = 5432 + + +class StreamChar(object): + "Object simulating a file for reading" + + def __init__(self, db): + self.db = db + self.nrows = db.nrows + self.step = db.step + self.read_it = self.read_iter() + + def values_generator(self): + j = 0 + for i in range(self.nrows): + if i >= j * self.step: + stop = (j + 1) * self.step + if stop > self.nrows: + stop = self.nrows + arr_i4, arr_f8 = self.db.fill_arrays(i, stop) + j += 1 + k = 0 + yield (arr_i4[k], arr_i4[k], arr_f8[k], arr_f8[k]) + k += 1 + + def read_iter(self): + sout = "" + n = self.nbytes + for tup in self.values_generator(): + sout += "%s\t%s\t%s\t%s\n" % tup + if n is not None and len(sout) > n: + for i in range(n, len(sout), n): + rout = sout[:n] + sout = sout[n:] + yield rout + yield sout + + def read(self, n=None): + self.nbytes = n + try: + str = next(self.read_it) + except StopIteration: + str = "" + return str + + # required by postgres2 driver, but not used + def readline(self): + pass + + +class Postgres_DB(DB): + + def __init__(self, nrows, rng, userandom): + DB.__init__(self, nrows, rng, userandom) + self.port = PORT + + def flatten(self, l): + """Flattens list of tuples l.""" + return [x[0] for x in l] + # return map(lambda x: x[col], l) + + # Overloads the method in DB class + def get_db_size(self): + sout = subprocess.Popen("sudo du -s %s" % DATA_DIR, + shell=True, + stdout=subprocess.PIPE).stdout + line = [l for l in sout][0] + return int(line.split()[0]) + + def open_db(self, remove=0): + if remove: + sout = subprocess.Popen(DROP_DB % self.filename, shell=True, + stdout=subprocess.PIPE).stdout + for line in sout: + print(line) + sout = subprocess.Popen(CREATE_DB % self.filename, shell=True, + stdout=subprocess.PIPE).stdout + for line in sout: + print(line) + + print("Processing database:", self.filename) + con = db2.connect(DSN % (self.filename, self.port)) + self.cur = con.cursor() + return con + + def create_table(self, con): + self.cur.execute("""create table %s( + col1 integer, + col2 integer, + col3 double precision, + col4 double precision)""" % TABLE_NAME) + con.commit() + + def fill_table(self, con): + st = StreamChar(self) + self.cur.copy_from(st, TABLE_NAME) + con.commit() + + def index_col(self, con, colname, optlevel, idxtype, verbose): + self.cur.execute("create index %s on %s(%s)" % + (colname + '_idx', TABLE_NAME, colname)) + con.commit() + + def do_query_simple(self, con, column, base): + self.cur.execute( + "select sum(%s) from %s where %s >= %s and %s <= %s" % + (column, TABLE_NAME, + column, base + self.rng[0], + column, base + self.rng[1])) +# "select * from %s where %s >= %s and %s <= %s" % \ +# (TABLE_NAME, +# column, base+self.rng[0], +# column, base+self.rng[1])) + #results = self.flatten(self.cur.fetchall()) + results = self.cur.fetchall() + return results + + def do_query(self, con, column, base, *unused): + d = (self.rng[1] - self.rng[0]) / 2. + inf1 = int(self.rng[0] + base) + sup1 = int(self.rng[0] + d + base) + inf2 = self.rng[0] + base * 2 + sup2 = self.rng[0] + d + base * 2 + # print "lims-->", inf1, inf2, sup1, sup2 + condition = "((%s>=%s) and (%s<%s)) or ((col2>%s) and (col2<%s))" + #condition = "((col3>=%s) and (col3<%s)) or ((col1>%s) and (col1<%s))" + condition += " and ((col1+3.1*col2+col3*col4) > 3)" + #condition += " and (sqrt(col1^2+col2^2+col3^2+col4^2) > .1)" + condition = condition % (column, inf2, column, sup2, inf1, sup1) + # print "condition-->", condition + self.cur.execute( + # "select sum(%s) from %s where %s" % + "select %s from %s where %s" % + (column, TABLE_NAME, condition)) + #results = self.flatten(self.cur.fetchall()) + results = self.cur.fetchall() + #results = self.cur.fetchall() + # print "results-->", results + # return results + return len(results) + + def close_db(self, con): + self.cur.close() + con.close() diff --git a/bench/pytables-search-bench.py b/bench/pytables-search-bench.py new file mode 100644 index 0000000..726d30b --- /dev/null +++ b/bench/pytables-search-bench.py @@ -0,0 +1,221 @@ +from __future__ import print_function +import os +from time import time +import random +import numpy as np +import tables + +# in order to always generate the same random sequence +random.seed(19) +np.random.seed((19, 20)) + + +def open_db(filename, remove=0): + if remove and os.path.exists(filename): + os.remove(filename) + con = tables.open_file(filename, 'a') + return con + + +def create_db(filename, nrows): + + class Record(tables.IsDescription): + col1 = tables.Int32Col() + col2 = tables.Int32Col() + col3 = tables.Float64Col() + col4 = tables.Float64Col() + + con = open_db(filename, remove=1) + table = con.create_table(con.root, 'table', Record, + filters=filters, expectedrows=nrows) + table.indexFilters = filters + step = 1000 * 100 + scale = 0.1 + t1 = time() + j = 0 + for i in range(0, nrows, step): + stop = (j + 1) * step + if stop > nrows: + stop = nrows + arr_f8 = np.arange(i, stop, type=np.Float64) + arr_i4 = np.arange(i, stop, type=np.Int32) + if userandom: + arr_f8 += np.random.normal(0, stop * scale, shape=[stop - i]) + arr_i4 = np.array(arr_f8, type=np.Int32) + recarr = np.rec.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8]) + table.append(recarr) + j += 1 + table.flush() + ctime = time() - t1 + if verbose: + print("insert time:", round(ctime, 5)) + print("Krows/s:", round((nrows / 1000.) / ctime, 5)) + index_db(table) + close_db(con) + + +def index_db(table): + t1 = time() + table.cols.col2.create_index() + itime = time() - t1 + if verbose: + print("index time (int):", round(itime, 5)) + print("Krows/s:", round((nrows / 1000.) / itime, 5)) + t1 = time() + table.cols.col4.create_index() + itime = time() - t1 + if verbose: + print("index time (float):", round(itime, 5)) + print("Krows/s:", round((nrows / 1000.) / itime, 5)) + + +def query_db(filename, rng): + con = open_db(filename) + table = con.root.table + # Query for integer columns + # Query for non-indexed column + if not doqueryidx: + t1 = time() + ntimes = 10 + for i in range(ntimes): + results = [ + r['col1'] for r in table.where( + rng[0] + i <= table.cols.col1 <= rng[1] + i) + ] + qtime = (time() - t1) / ntimes + if verbose: + print("query time (int, not indexed):", round(qtime, 5)) + print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) + print(results) + # Query for indexed column + t1 = time() + ntimes = 10 + for i in range(ntimes): + results = [ + r['col1'] for r in table.where( + rng[0] + i <= table.cols.col2 <= rng[1] + i) + ] + qtime = (time() - t1) / ntimes + if verbose: + print("query time (int, indexed):", round(qtime, 5)) + print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) + print(results) + # Query for floating columns + # Query for non-indexed column + if not doqueryidx: + t1 = time() + ntimes = 10 + for i in range(ntimes): + results = [ + r['col3'] for r in table.where( + rng[0] + i <= table.cols.col3 <= rng[1] + i) + ] + qtime = (time() - t1) / ntimes + if verbose: + print("query time (float, not indexed):", round(qtime, 5)) + print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) + print(results) + # Query for indexed column + t1 = time() + ntimes = 10 + for i in range(ntimes): + results = [r['col3'] for r in + table.where(rng[0] + i <= table.cols.col4 <= rng[1] + i)] + qtime = (time() - t1) / ntimes + if verbose: + print("query time (float, indexed):", round(qtime, 5)) + print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) + print(results) + close_db(con) + + +def close_db(con): + con.close() + +if __name__ == "__main__": + import sys + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-m] [-c] [-q] [-i] [-z complevel] [-l complib] [-R range] [-n nrows] file + -v verbose + -p use "psyco" if available + -m use random values to fill the table + -q do a query (both indexed and non-indexed version) + -i do a query (exclude non-indexed version) + -c create the database + -z compress with zlib (no compression by default) + -l use complib for compression (zlib used by default) + -R select a range in a field in the form "start,stop" (def "0,10") + -n sets the number of rows (in krows) in each table + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpmcqiz:l:R:n:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + docompress = 0 + complib = "zlib" + doquery = 0 + doqueryidx = 0 + rng = [0, 10] + nrows = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + elif option[0] == '-p': + usepsyco = 1 + elif option[0] == '-m': + userandom = 1 + elif option[0] == '-c': + docreate = 1 + createindex = 1 + elif option[0] == '-q': + doquery = 1 + elif option[0] == '-i': + doqueryidx = 1 + elif option[0] == '-z': + docompress = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-n': + nrows = int(option[1]) + + # Catch the hdf5 file passed as the last argument + filename = pargs[0] + + # The filters chosen + filters = tables.Filters(complevel=docompress, complib=complib) + + if verbose: + print("pytables version:", tables.__version__) + if userandom: + print("using random values") + if doqueryidx: + print("doing indexed queries only") + + if docreate: + if verbose: + print("writing %s krows" % nrows) + if psyco_imported and usepsyco: + psyco.bind(create_db) + nrows *= 1000 + create_db(filename, nrows) + + if doquery: + query_db(filename, rng) diff --git a/bench/pytables_backend.py b/bench/pytables_backend.py new file mode 100644 index 0000000..323db30 --- /dev/null +++ b/bench/pytables_backend.py @@ -0,0 +1,191 @@ +from __future__ import print_function +import os +import tables +from indexed_search import DB + + +class PyTables_DB(DB): + + def __init__(self, nrows, rng, userandom, datadir, + docompress=0, complib='zlib', kind="medium", optlevel=6): + DB.__init__(self, nrows, rng, userandom) + self.tprof = [] + # Specific part for pytables + self.docompress = docompress + self.complib = complib + # Complete the filename + self.filename = "pro-" + self.filename + self.filename += '-' + 'O%s' % optlevel + self.filename += '-' + kind + if docompress: + self.filename += '-' + complib + str(docompress) + self.datadir = datadir + if not os.path.isdir(self.datadir): + if not os.path.isabs(self.datadir): + dir_path = os.path.join(os.getcwd(), self.datadir) + else: + dir_path = self.datadir + os.makedirs(dir_path) + self.datadir = dir_path + print("Created {}.".format(self.datadir)) + self.filename = self.datadir + '/' + self.filename + '.h5' + # The chosen filters + self.filters = tables.Filters(complevel=self.docompress, + complib=self.complib, + shuffle=1) + print("Processing database:", self.filename) + + def open_db(self, remove=0): + if remove and os.path.exists(self.filename): + os.remove(self.filename) + con = tables.open_file(self.filename, 'a') + return con + + def close_db(self, con): + # Remove first the table_cache attribute if it exists + if hasattr(self, "table_cache"): + del self.table_cache + con.close() + + def create_table(self, con): + class Record(tables.IsDescription): + col1 = tables.Int32Col() + col2 = tables.Int32Col() + col3 = tables.Float64Col() + col4 = tables.Float64Col() + + con.create_table(con.root, 'table', Record, + filters=self.filters, expectedrows=self.nrows) + + def fill_table(self, con): + "Fills the table" + table = con.root.table + j = 0 + for i in range(0, self.nrows, self.step): + stop = (j + 1) * self.step + if stop > self.nrows: + stop = self.nrows + arr_i4, arr_f8 = self.fill_arrays(i, stop) +# recarr = records.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8]) +# table.append(recarr) + table.append([arr_i4, arr_i4, arr_f8, arr_f8]) + j += 1 + table.flush() + + def index_col(self, con, column, kind, optlevel, verbose): + col = getattr(con.root.table.cols, column) + tmp_dir = os.path.join(self.datadir, "scratch2") + if not os.path.isdir(tmp_dir): + os.makedirs(tmp_dir) + print("Created scratch space.") + col.create_index(kind=kind, optlevel=optlevel, filters=self.filters, + tmp_dir=tmp_dir, _verbose=verbose, _blocksizes=None) +# _blocksizes=(2**27, 2**22, 2**15, 2**7)) +# _blocksizes=(2**27, 2**22, 2**14, 2**6)) +# _blocksizes=(2**27, 2**20, 2**13, 2**5), +# _testmode=True) + + def do_query(self, con, column, base, inkernel): + if True: + if not hasattr(self, "table_cache"): + self.table_cache = table = con.root.table + self.colobj = getattr(table.cols, column) + #self.colobj = getattr(table.cols, 'col1') + self.condvars = {"col": self.colobj, + "col1": table.cols.col1, + "col2": table.cols.col2, + "col3": table.cols.col3, + "col4": table.cols.col4, + } + table = self.table_cache + colobj = self.colobj + else: + table = con.root.table + colobj = getattr(table.cols, column) + self.condvars = {"col": colobj, + "col1": table.cols.col1, + "col2": table.cols.col2, + "col3": table.cols.col3, + "col4": table.cols.col4, + } + self.condvars['inf'] = self.rng[0] + base + self.condvars['sup'] = self.rng[1] + base + # For queries that can use two indexes instead of just one + d = (self.rng[1] - self.rng[0]) / 2. + inf1 = int(self.rng[0] + base) + sup1 = int(self.rng[0] + d + base) + inf2 = self.rng[0] + base * 2 + sup2 = self.rng[0] + d + base * 2 + self.condvars['inf1'] = inf1 + self.condvars['sup1'] = sup1 + self.condvars['inf2'] = inf2 + self.condvars['sup2'] = sup2 + #condition = "(inf == col2)" + #condition = "(inf==col2) & (col4==sup)" + #condition = "(inf==col2) | (col4==sup)" + #condition = "(inf==col2) | (col2==sup)" + #condition = "(inf==col2) & (col3==sup)" + #condition = "((inf==col2) & (sup==col4)) & (col3==sup)" + #condition = "((inf==col1) & (sup==col4)) & (col3==sup)" + #condition = "(inf<=col1) & (col3", inf1, inf2, sup1, sup2 + condition = "((inf2<=col) & (col", c['inf'], c['sup'], c['inf2'], c['sup2'] + ncoords = 0 + if colobj.is_indexed: + results = [r[column] + for r in table.where(condition, self.condvars)] +# coords = table.get_where_list(condition, self.condvars) +# results = table.read_coordinates(coords, field=column) + +# results = table.read_where(condition, self.condvars, field=column) + + elif inkernel: + print("Performing in-kernel query") + results = [r[column] + for r in table.where(condition, self.condvars)] + #coords = [r.nrow for r in table.where(condition, self.condvars)] + #results = table.read_coordinates(coords) +# for r in table.where(condition, self.condvars): +# var = r[column] +# ncoords += 1 + else: +# coords = [r.nrow for r in table +# if (self.rng[0]+base <= r[column] <= self.rng[1]+base)] +# results = table.read_coordinates(coords) + print("Performing regular query") + results = [ + r[column] for r in table if (( + (inf2 <= r['col4']) and (r['col4'] < sup2)) or + ((inf1 < r['col2']) and (r['col2'] < sup1)) and + ((r['col1'] + 3.1 * r['col2'] + r['col3'] * r['col4']) > 3) + )] + + ncoords = len(results) + + # return coords + # print "results-->", results + # return results + return ncoords + #self.tprof.append( self.colobj.index.tprof ) + # return ncoords, self.tprof diff --git a/bench/recarray2-test.py b/bench/recarray2-test.py new file mode 100644 index 0000000..e4affe2 --- /dev/null +++ b/bench/recarray2-test.py @@ -0,0 +1,106 @@ +from __future__ import print_function +import os +import sys +import time +import numpy as np +import chararray +import recarray +import recarray2 # This is my modified version + +usage = """usage: %s recordlength + Set recordlength to 1000 at least to obtain decent figures! +""" % sys.argv[0] + +try: + reclen = int(sys.argv[1]) +except: + print(usage) + sys.exit() + +delta = 0.000001 + +# Creation of recarrays objects for test +x1 = np.array(np.arange(reclen)) +x2 = chararray.array(None, itemsize=7, shape=reclen) +x3 = np.array(np.arange(reclen, reclen * 3, 2), np.Float64) +r1 = recarray.fromarrays([x1, x2, x3], names='a,b,c') +r2 = recarray2.fromarrays([x1, x2, x3], names='a,b,c') + +print("recarray shape in test ==>", r2.shape) + +print("Assignment in recarray original") +print("-------------------------------") +t1 = time.clock() +for row in range(reclen): + #r1.field("b")[row] = "changed" + r1.field("c")[row] = float(row ** 2) +t2 = time.clock() +origtime = round(t2 - t1, 3) +print("Assign time:", origtime, " Rows/s:", int(reclen / (origtime + delta))) +# print "Field b on row 2 after re-assign:", r1.field("c")[2] +print() + +print("Assignment in recarray modified") +print("-------------------------------") +t1 = time.clock() +for row in range(reclen): + rec = r2._row(row) # select the row to be changed + # rec.b = "changed" # change the "b" field + rec.c = float(row ** 2) # Change the "c" field +t2 = time.clock() +ttime = round(t2 - t1, 3) +print("Assign time:", ttime, " Rows/s:", int(reclen / (ttime + delta)), + end=' ') +print(" Speed-up:", round(origtime / ttime, 3)) +# print "Field b on row 2 after re-assign:", r2.field("c")[2] +print() + +print("Selection in recarray original") +print("------------------------------") +t1 = time.clock() +for row in range(reclen): + rec = r1[row] + if rec.field("a") < 3: + print("This record pass the cut ==>", rec.field("c"), "(row", row, ")") +t2 = time.clock() +origtime = round(t2 - t1, 3) +print("Select time:", origtime, " Rows/s:", int(reclen / (origtime + delta))) +print() + +print("Selection in recarray modified") +print("------------------------------") +t1 = time.clock() +for row in range(reclen): + rec = r2._row(row) + if rec.a < 3: + print("This record pass the cut ==>", rec.c, "(row", row, ")") +t2 = time.clock() +ttime = round(t2 - t1, 3) +print("Select time:", ttime, " Rows/s:", int(reclen / (ttime + delta)), + end=' ') +print(" Speed-up:", round(origtime / ttime, 3)) +print() + +print("Printing in recarray original") +print("------------------------------") +f = open("test.out", "w") +t1 = time.clock() +f.write(str(r1)) +t2 = time.clock() +origtime = round(t2 - t1, 3) +f.close() +os.unlink("test.out") +print("Print time:", origtime, " Rows/s:", int(reclen / (origtime + delta))) +print() +print("Printing in recarray modified") +print("------------------------------") +f = open("test2.out", "w") +t1 = time.clock() +f.write(str(r2)) +t2 = time.clock() +ttime = round(t2 - t1, 3) +f.close() +os.unlink("test2.out") +print("Print time:", ttime, " Rows/s:", int(reclen / (ttime + delta)), end=' ') +print(" Speed-up:", round(origtime / ttime, 3)) +print() diff --git a/bench/search-bench-plot.py b/bench/search-bench-plot.py new file mode 100644 index 0000000..9dc4a87 --- /dev/null +++ b/bench/search-bench-plot.py @@ -0,0 +1,148 @@ +from __future__ import print_function +import tables +from pylab import * + + +def get_values(filename, complib=''): + f = tables.open_file(filename) + nrows = f.root.small.create_best.cols.nrows[:] + corrected_sizes = nrows / 10. ** 6 + if mb_units: + corrected_sizes = 16 * nrows / 10. ** 6 + if insert: + values = corrected_sizes / f.root.small.create_best.cols.tfill[:] + if table_size: + values = f.root.small.create_best.cols.fsize[:] / nrows + if query: + values = corrected_sizes / \ + f.root.small.search_best.inkernel.int.cols.time1[:] + if query_cache: + values = corrected_sizes / \ + f.root.small.search_best.inkernel.int.cols.time2[:] + + f.close() + return nrows, values + + +def show_plot(plots, yaxis, legends, gtitle): + xlabel('Number of rows') + ylabel(yaxis) + xlim(10 ** 3, 10 ** 8) + title(gtitle) + grid(True) + +# legends = [f[f.find('-'):f.index('.out')] for f in filenames] +# legends = [l.replace('-', ' ') for l in legends] + if table_size: + legend([p[0] for p in plots], legends, loc="upper right") + else: + legend([p[0] for p in plots], legends, loc="upper left") + + #subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + savefig(outfile) + else: + show() + +if __name__ == '__main__': + + import sys + import getopt + + usage = """usage: %s [-o file] [-t title] [--insert] [--table-size] [--query] [--query-cache] [--MB-units] files + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + --insert -- Insert time for table + --table-size -- Size of table + --query -- Time for querying the integer column + --query-cache -- Time for querying the integer (cached) + --MB-units -- Express speed in MB/s instead of MRows/s + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'o:t:', + ['insert', + 'table-size', + 'query', + 'query-cache', + 'MB-units', + ]) + except: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + outfile = None + insert = 0 + table_size = 0 + query = 0 + query_cache = 0 + mb_units = 0 + yaxis = "No axis name" + tit = None + gtitle = "Please set a title!" + + # Get the options + for option in opts: + if option[0] == '-o': + outfile = option[1] + elif option[0] == '-t': + tit = option[1] + elif option[0] == '--insert': + insert = 1 + yaxis = "MRows/s" + gtitle = "Writing with small (16 bytes) record size" + elif option[0] == '--table-size': + table_size = 1 + yaxis = "Bytes/row" + gtitle = ("Disk space taken by a record (original record size: " + "16 bytes)") + elif option[0] == '--query': + query = 1 + yaxis = "MRows/s" + gtitle = ("Selecting with small (16 bytes) record size (file not " + "in cache)") + elif option[0] == '--query-cache': + query_cache = 1 + yaxis = "MRows/s" + gtitle = ("Selecting with small (16 bytes) record size (file in " + "cache)") + elif option[0] == '--MB-units': + mb_units = 1 + + filenames = pargs + + if mb_units and yaxis == "MRows/s": + yaxis = "MB/s" + + if tit: + gtitle = tit + + plots = [] + legends = [] + for filename in filenames: + plegend = filename[filename.find('cl-') + 3:filename.index('.h5')] + plegend = plegend.replace('-', ' ') + xval, yval = get_values(filename, '') + print("Values for %s --> %s, %s" % (filename, xval, yval)) + #plots.append(loglog(xval, yval, linewidth=5)) + plots.append(semilogx(xval, yval, linewidth=4)) + legends.append(plegend) + if 0: # Per a introduir dades simulades si es vol... + xval = [1000, 10000, 100000, 1000000, 10000000, + 100000000, 1000000000] +# yval = [0.003, 0.005, 0.02, 0.06, 1.2, +# 40, 210] + yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, + 0.2, 5.6] + plots.append(loglog(xval, yval, linewidth=5)) + legends.append("PyTables Std") + show_plot(plots, yaxis, legends, gtitle) diff --git a/bench/search-bench-rnd.sh b/bench/search-bench-rnd.sh new file mode 100755 index 0000000..db24750 --- /dev/null +++ b/bench/search-bench-rnd.sh @@ -0,0 +1,122 @@ +#!/bin/sh +# I don't know why, but the /usr/bin/python2.3 from Debian is a 30% slower +# than my own compiled version! 2004-08-18 +python="/usr/local/bin/python2.3 -O" + +writedata () { + nrows=$1 + bfile=$2 + worst=$3 + psyco=$4 + if [ "$shuffle" = "1" ]; then + shufflef="-S" + else + shufflef="" + fi + cmd="${python} search-bench.py -R ${worst} -b ${bfile} -h ${psyco} -l ${libcomp} -c ${complevel} ${shufflef} -w -n ${nrows} data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + worst=$3 + psyco=$4 + smode=$5 + + if [ "$smode" = "indexed" ]; then + #repeats=100 + repeats=20 + else + repeats=2 + fi + cmd="${python} search-bench.py -R ${worst} -h -b ${bfile} ${psyco} -m ${smode} -r -k ${repeats} data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi + +# Configuration for testing +#nrowslist="50000" +#nrowslistworst="50000" + +# Normal test +#nrowslist="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000" +#nrowslistworst="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000" +nrowslist="1 2 5 10 20 50 100 200 500 1000" +nrowslistworst="1 2 5 10 20 50 100 200 500 1000" +#nrowslist="1 2 5 10" +#nrowslistworst="1 2 5 10" + +# The next can be regarded as parameters +shuffle=1 + +for libcomp in none zlib lzo; do +#for libcomp in none lzo; do + if [ "$libcomp" = "none" ]; then + complevel=0 + else + complevel=1 + fi + # The name of the data bench file + bfile="worst-dbench-cl-${libcomp}-c${complevel}-S${shuffle}.h5" + + # Move out a possible previous benchmark file + bn=`basename $bfile ".h5"` + mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 + mv -f ${bn}-bck.h5 ${bn}-bck2.h5 + if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 + else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 + fi + for worst in "" -t; do + #for worst in ""; do + # Write data files + if [ "$worst" = "-t" ]; then + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering worst case..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + nrowslist=$nrowslistworst + fi + # Write data file + for nrows in $nrowslist; do + echo "*************************************************************" + echo "Writing for nrows=$nrows Krows, psyco=$psyco, worst='${worst}'" + echo "*************************************************************" + writedata ${nrows} ${bfile} "${worst}" "${psyco}" + done + # Read data files + for smode in indexed inkernel standard; do + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco, worst='${worst}'" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${worst}" "${psyco}" "${smode}" + done + done + # Finally, after the final search, delete the source (if desired) +# for nrows in $nrowslist; do +# rm -f data.nobackup/bench-${libcomp}-${nrows}k.h5 +# done + done + echo "New data available on: $bfile" +done + +exit 0 diff --git a/bench/search-bench.py b/bench/search-bench.py new file mode 100644 index 0000000..a56f738 --- /dev/null +++ b/bench/search-bench.py @@ -0,0 +1,524 @@ +#!/usr/bin/env python + +from __future__ import print_function +import sys +import math +import time +import random +import warnings +import os + +import numpy + +from tables import * + +# Initialize the random generator always with the same integer +# in order to have reproductible results +random.seed(19) +numpy.random.seed(19) + +randomvalues = 0 +worst = 0 + +Small = { + "var1": StringCol(itemsize=4, dflt="Hi!", pos=2), + "var2": Int32Col(pos=1), + "var3": Float64Col(pos=0), + #"var4" : BoolCol(), +} + + +def createNewBenchFile(bfile, verbose): + + class Create(IsDescription): + nrows = Int32Col(pos=0) + irows = Int32Col(pos=1) + tfill = Float64Col(pos=2) + tidx = Float64Col(pos=3) + tcfill = Float64Col(pos=4) + tcidx = Float64Col(pos=5) + rowsecf = Float64Col(pos=6) + rowseci = Float64Col(pos=7) + fsize = Float64Col(pos=8) + isize = Float64Col(pos=9) + psyco = BoolCol(pos=10) + + class Search(IsDescription): + nrows = Int32Col(pos=0) + rowsel = Int32Col(pos=1) + time1 = Float64Col(pos=2) + time2 = Float64Col(pos=3) + tcpu1 = Float64Col(pos=4) + tcpu2 = Float64Col(pos=5) + rowsec1 = Float64Col(pos=6) + rowsec2 = Float64Col(pos=7) + psyco = BoolCol(pos=8) + + if verbose: + print("Creating a new benchfile:", bfile) + # Open the benchmarking file + bf = open_file(bfile, "w") + # Create groups + for recsize in ["small"]: + group = bf.create_group("/", recsize, recsize + " Group") + # Attach the row size of table as attribute + if recsize == "small": + group._v_attrs.rowsize = 16 + # Create a Table for writing bench + bf.create_table(group, "create_best", Create, "best case") + bf.create_table(group, "create_worst", Create, "worst case") + for case in ["best", "worst"]: + # create a group for searching bench (best case) + groupS = bf.create_group(group, "search_" + case, "Search Group") + # Create Tables for searching + for mode in ["indexed", "inkernel", "standard"]: + groupM = bf.create_group(groupS, mode, mode + " Group") + # for searching bench + # for atom in ["string", "int", "float", "bool"]: + for atom in ["string", "int", "float"]: + bf.create_table(groupM, atom, Search, atom + " bench") + bf.close() + + +def createFile(filename, nrows, filters, index, heavy, noise, verbose): + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="Searchsorted Benchmark", + filters=filters) + rowswritten = 0 + + # Create the test table + table = fileh.create_table(fileh.root, 'table', Small, "test table", + None, nrows) + + t1 = time.time() + cpu1 = time.clock() + nrowsbuf = table.nrowsinbuf + minimum = 0 + maximum = nrows + for i in range(0, nrows, nrowsbuf): + if i + nrowsbuf > nrows: + j = nrows + else: + j = i + nrowsbuf + if randomvalues: + var3 = numpy.random.uniform(minimum, maximum, size=j - i) + else: + var3 = numpy.arange(i, j, dtype=numpy.float64) + if noise > 0: + var3 += numpy.random.uniform(-noise, noise, size=j - i) + var2 = numpy.array(var3, dtype=numpy.int32) + var1 = numpy.empty(shape=[j - i], dtype="S4") + if not heavy: + var1[:] = var2 + table.append([var3, var2, var1]) + table.flush() + rowswritten += nrows + time1 = time.time() - t1 + tcpu1 = time.clock() - cpu1 + print("Time for filling:", round(time1, 3), + "Krows/s:", round(nrows / 1000. / time1, 3), end=' ') + fileh.close() + size1 = os.stat(filename)[6] + print(", File size:", round(size1 / (1024. * 1024.), 3), "MB") + fileh = open_file(filename, mode="a", title="Searchsorted Benchmark", + filters=filters) + table = fileh.root.table + rowsize = table.rowsize + if index: + t1 = time.time() + cpu1 = time.clock() + # Index all entries + if not heavy: + indexrows = table.cols.var1.create_index(filters=filters) + for colname in ['var2', 'var3']: + table.colinstances[colname].create_index(filters=filters) + time2 = time.time() - t1 + tcpu2 = time.clock() - cpu1 + print("Time for indexing:", round(time2, 3), + "iKrows/s:", round(indexrows / 1000. / time2, 3), end=' ') + else: + indexrows = 0 + time2 = 0.0000000001 # an ugly hack + tcpu2 = 0. + + if verbose: + if index: + idx = table.cols.var1.index + print("Index parameters:", repr(idx)) + else: + print("NOT indexing rows") + # Close the file + fileh.close() + + size2 = os.stat(filename)[6] - size1 + if index: + print(", Index size:", round(size2 / (1024. * 1024.), 3), "MB") + return (rowswritten, indexrows, rowsize, time1, time2, + tcpu1, tcpu2, size1, size2) + + +def benchCreate(file, nrows, filters, index, bfile, heavy, + psyco, noise, verbose): + + # Open the benchfile in append mode + bf = open_file(bfile, "a") + recsize = "small" + if worst: + table = bf.get_node("/" + recsize + "/create_worst") + else: + table = bf.get_node("/" + recsize + "/create_best") + + (rowsw, irows, rowsz, time1, time2, tcpu1, tcpu2, size1, size2) = \ + createFile(file, nrows, filters, index, heavy, noise, verbose) + # Collect data + table.row["nrows"] = rowsw + table.row["irows"] = irows + table.row["tfill"] = time1 + table.row["tidx"] = time2 + table.row["tcfill"] = tcpu1 + table.row["tcidx"] = tcpu2 + table.row["fsize"] = size1 + table.row["isize"] = size2 + table.row["psyco"] = psyco + tapprows = round(time1, 3) + cpuapprows = round(tcpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + rowsecf = rowsw / tapprows + table.row["rowsecf"] = rowsecf + # print "Write rows/sec: ", rowsecf + print("Total file size:", + round((size1 + size2) / (1024. * 1024.), 3), "MB", end=' ') + print(", Write KB/s (pure data):", int(rowsw * rowsz / (tapprows * 1024))) + # print "Write KB/s :", int((size1+size2) / ((time1+time2) * 1024)) + tidxrows = time2 + cpuidxrows = round(tcpu2, 3) + tpercent = int(round(cpuidxrows / tidxrows, 2) * 100) + print("Rows indexed:", irows, " (IMRows):", irows / float(10 ** 6)) + print("Time indexing rows: %s s (real) %s s (cpu) %s%%" % + (round(tidxrows, 3), cpuidxrows, tpercent)) + rowseci = irows / tidxrows + table.row["rowseci"] = rowseci + table.row.append() + bf.close() + + +def readFile(filename, atom, riter, indexmode, dselect, verbose): + # Open the HDF5 file in read-only mode + + fileh = open_file(filename, mode="r") + table = fileh.root.table + var1 = table.cols.var1 + var2 = table.cols.var2 + var3 = table.cols.var3 + if indexmode == "indexed": + if var2.index.nelements > 0: + where = table._whereIndexed + else: + warnings.warn( + "Not indexed table or empty index. Defaulting to in-kernel " + "selection") + indexmode = "inkernel" + where = table._whereInRange + elif indexmode == "inkernel": + where = table.where + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + if indexmode == "indexed": + print("Chunk size:", var2.index.sorted.chunksize) + print("Number of elements per slice:", var2.index.nelemslice) + print("Slice number in", table._v_pathname, ":", var2.index.nrows) + + #table.nrowsinbuf = 10 + # print "nrowsinbuf-->", table.nrowsinbuf + rowselected = 0 + time2 = 0. + tcpu2 = 0. + results = [] + print("Select mode:", indexmode, ". Selecting for type:", atom) + # Initialize the random generator always with the same integer + # in order to have reproductible results on each read iteration + random.seed(19) + numpy.random.seed(19) + for i in range(riter): + # The interval for look values at. This is aproximately equivalent to + # the number of elements to select + rnd = numpy.random.randint(table.nrows) + cpu1 = time.clock() + t1 = time.time() + if atom == "string": + val = str(rnd)[-4:] + if indexmode in ["indexed", "inkernel"]: + results = [p.nrow + for p in where('var1 == val')] + else: + results = [p.nrow for p in table + if p["var1"] == val] + elif atom == "int": + val = rnd + dselect + if indexmode in ["indexed", "inkernel"]: + results = [p.nrow + for p in where('(rnd <= var3) & (var3 < val)')] + else: + results = [p.nrow for p in table + if rnd <= p["var2"] < val] + elif atom == "float": + val = rnd + dselect + if indexmode in ["indexed", "inkernel"]: + t1 = time.time() + results = [p.nrow + for p in where('(rnd <= var3) & (var3 < val)')] + else: + results = [p.nrow for p in table + if float(rnd) <= p["var3"] < float(val)] + else: + raise ValueError("Value for atom '%s' not supported." % atom) + rowselected += len(results) + # print "selected values-->", results + if i == 0: + # First iteration + time1 = time.time() - t1 + tcpu1 = time.clock() - cpu1 + else: + if indexmode == "indexed": + # if indexed, wait until the 5th iteration (in order to + # insure that the index is effectively cached) to take times + if i >= 5: + time2 += time.time() - t1 + tcpu2 += time.clock() - cpu1 + else: + time2 += time.time() - t1 + tcpu2 += time.clock() - cpu1 + + if riter > 1: + if indexmode == "indexed" and riter >= 5: + correction = 5 + else: + correction = 1 + time2 = time2 / (riter - correction) + tcpu2 = tcpu2 / (riter - correction) + if verbose and 1: + print("Values that fullfill the conditions:") + print(results) + + #rowsread = table.nrows * riter + rowsread = table.nrows + rowsize = table.rowsize + + # Close the file + fileh.close() + + return (rowsread, rowselected, rowsize, time1, time2, tcpu1, tcpu2) + + +def benchSearch(file, riter, indexmode, bfile, heavy, psyco, dselect, verbose): + + # Open the benchfile in append mode + bf = open_file(bfile, "a") + recsize = "small" + if worst: + tableparent = "/" + recsize + "/search_worst/" + indexmode + "/" + else: + tableparent = "/" + recsize + "/search_best/" + indexmode + "/" + + # Do the benchmarks + if not heavy: + #atomlist = ["string", "int", "float", "bool"] + atomlist = ["string", "int", "float"] + else: + #atomlist = ["int", "float", "bool"] + atomlist = ["int", "float"] + for atom in atomlist: + tablepath = tableparent + atom + table = bf.get_node(tablepath) + (rowsr, rowsel, rowssz, time1, time2, tcpu1, tcpu2) = \ + readFile(file, atom, riter, indexmode, dselect, verbose) + row = table.row + row["nrows"] = rowsr + row["rowsel"] = rowsel + treadrows = round(time1, 6) + row["time1"] = time1 + treadrows2 = round(time2, 6) + row["time2"] = time2 + cpureadrows = round(tcpu1, 6) + row["tcpu1"] = tcpu1 + cpureadrows2 = round(tcpu2, 6) + row["tcpu2"] = tcpu2 + row["psyco"] = psyco + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + if riter > 1: + tpercent2 = int(round(cpureadrows2 / treadrows2, 2) * 100) + else: + tpercent2 = 0. + tMrows = rowsr / (1000 * 1000.) + sKrows = rowsel / 1000. + if atom == "string": # just to print once + print("Rows read:", rowsr, "Mread:", round(tMrows, 6), "Mrows") + print("Rows selected:", rowsel, "Ksel:", round(sKrows, 6), "Krows") + print("Time selecting (1st time): %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + if riter > 1: + print("Time selecting (cached): %s s (real) %s s (cpu) %s%%" % + (treadrows2, cpureadrows2, tpercent2)) + #rowsec1 = round(rowsr / float(treadrows), 6)/10**6 + rowsec1 = rowsr / treadrows + row["rowsec1"] = rowsec1 + print("Read Mrows/sec: ", end=' ') + print(round(rowsec1 / 10. ** 6, 6), "(first time)", end=' ') + if riter > 1: + rowsec2 = rowsr / treadrows2 + row["rowsec2"] = rowsec2 + print(round(rowsec2 / 10. ** 6, 6), "(cache time)") + else: + print() + # Append the info to the table + row.append() + table.flush() + # Close the benchmark file + bf.close() + + +if __name__ == "__main__": + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-R] [-r] [-w] [-c level] [-l complib] [-S] [-F] [-n nrows] [-x] [-b file] [-t] [-h] [-k riter] [-m indexmode] [-N range] [-d range] datafile + -v verbose + -p use "psyco" if available + -R use Random values for filling + -r only read test + -w only write test + -c sets a compression level (do not set it or 0 for no compression) + -l sets the compression library ("zlib", "lzo", "ucl", "bzip2" or "none") + -S activate shuffling filter + -F activate fletcher32 filter + -n set the number of rows in tables (in krows) + -x don't make indexes + -b bench filename + -t worsT searching case + -h heavy benchmark (operations without strings) + -m index mode for reading ("indexed" | "inkernel" | "standard") + -N introduce (uniform) noise within range into the values + -d the interval for look values (int, float) at. Default is 3. + -k number of iterations for reading\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], 'vpSFRrowxthk:b:c:l:n:m:N:d:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + dselect = 3. + noise = 0. + verbose = 0 + fieldName = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + nrows = 1000 + index = 1 + heavy = 0 + bfile = "bench.h5" + supported_imodes = ["indexed", "inkernel", "standard"] + indexmode = "inkernel" + riter = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + if option[0] == '-p': + usepsyco = 1 + if option[0] == '-R': + randomvalues = 1 + if option[0] == '-S': + shuffle = 1 + if option[0] == '-F': + fletcher32 = 1 + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-x': + index = 0 + elif option[0] == '-h': + heavy = 1 + elif option[0] == '-t': + worst = 1 + elif option[0] == '-b': + bfile = option[1] + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-m': + indexmode = option[1] + if indexmode not in supported_imodes: + raise ValueError( + "Indexmode should be any of '%s' and you passed '%s'" % + (supported_imodes, indexmode)) + elif option[0] == '-n': + nrows = int(float(option[1]) * 1000) + elif option[0] == '-N': + noise = float(option[1]) + elif option[0] == '-d': + dselect = float(option[1]) + elif option[0] == '-k': + riter = int(option[1]) + + if worst: + nrows -= 1 # the worst case + + if complib == "none": + # This means no compression at all + complib = "zlib" # just to make PyTables not complaining + complevel = 0 + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + # Build the Filters instance + filters = Filters(complevel=complevel, complib=complib, + shuffle=shuffle, fletcher32=fletcher32) + + # Create the benchfile (if needed) + if not os.path.exists(bfile): + createNewBenchFile(bfile, verbose) + + if testwrite: + if verbose: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + if psyco_imported and usepsyco: + psyco.bind(createFile) + benchCreate(file, nrows, filters, index, bfile, heavy, + usepsyco, noise, verbose) + if testread: + if psyco_imported and usepsyco: + psyco.bind(readFile) + benchSearch(file, riter, indexmode, bfile, heavy, usepsyco, + dselect, verbose) diff --git a/bench/search-bench.sh b/bench/search-bench.sh new file mode 100755 index 0000000..402bd77 --- /dev/null +++ b/bench/search-bench.sh @@ -0,0 +1,123 @@ +#!/bin/sh +python="python2.5 -O" + +writedata () { + nrows=$1 + bfile=$2 + heavy=$3 + psyco=$4 + if [ "$shuffle" = "1" ]; then + shufflef="-S" + else + shufflef="" + fi + cmd="${python} search-bench.py -b ${bfile} ${heavy} ${psyco} -l ${libcomp} -c ${complevel} ${shufflef} -w -n ${nrows} -x data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + heavy=$3 + psyco=$4 + smode=$5 + + if [ "$smode" = "indexed" ]; then + repeats=100 + else + repeats=2 + fi + if [ "$heavy" = "-h" -a "$smode" = "standard" ]; then + # For heavy mode don't do a standard search + echo "Skipping the standard search for heavy mode" + else + cmd="${python} search-bench.py -b ${bfile} ${heavy} ${psyco} -m ${smode} -r -k ${repeats} data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} + fi + if [ "$smode" = "standard" -a "1" = "0" ]; then + # Finally, after the final search, delete the source (if desired) + rm -f data.nobackup/bench-${libcomp}-${nrows}k.h5 + fi + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi +# The next can be regarded as parameters +libcomp="lzo" +complevel=1 +shuffle=1 + +# The name of the data bench file +bfile="dbench-cl-${libcomp}-c${complevel}-S${shuffle}.h5" + +# Move out a possible previous benchmark file +bn=`basename $bfile ".h5"` +mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 +mv -f ${bn}-bck.h5 ${bn}-bck2.h5 +if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 +else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 +fi + +# Configuration for testing +nrowslist="1 2" +nrowslistheavy="5 10" +# This config takes 10 minutes to complete (psyco, zlib) +#nrowslist="1 2 5 10 20 50 100 200 500 1000" +#nrowslistheavy="2000 5000 10000" +#nrowslist="" +#nrowslistheavy="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000" + +# Normal test +#nrowslist="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +#nrowslistheavy="20000 50000 100000 200000 500000 1000000" +# Big test +#nrowslist="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +#nrowslistheavy="20000 50000 100000 200000 500000 1000000 2000000 5000000" + +for heavy in "" -h; do + # Write data files (light mode) + if [ "$heavy" = "-h" ]; then + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering heavy mode..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + nrowslist=$nrowslistheavy + fi + # Write data file + for nrows in $nrowslist; do + echo "*************************************************************" + echo "Writing for nrows=$nrows Krows, psyco=$psyco, heavy='${heavy}'" + echo "*************************************************************" + writedata ${nrows} ${bfile} "${heavy}" "${psyco}" + done + # Read data files + #for smode in indexed inkernel standard; do + for smode in inkernel standard; do +# for smode in indexed; do + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco, heavy='${heavy}'" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${heavy}" "${psyco}" "${smode}" + done + done +done + +echo "New data available on: $bfile" +exit 0 diff --git a/bench/searchsorted-bench.py b/bench/searchsorted-bench.py new file mode 100644 index 0000000..f595de5 --- /dev/null +++ b/bench/searchsorted-bench.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python + +from __future__ import print_function +import time +from tables import * + + +class Small(IsDescription): + var1 = StringCol(itemsize=4) + var2 = Int32Col() + var3 = Float64Col() + var4 = BoolCol() + +# Define a user record to characterize some kind of particles + + +class Medium(IsDescription): + var1 = StringCol(itemsize=16) # 16-character String + #float1 = Float64Col(dflt=2.3) + #float2 = Float64Col(dflt=2.3) + # zADCcount = Int16Col() # signed short integer + var2 = Int32Col() # signed short integer + var3 = Float64Col() + grid_i = Int32Col() # integer + grid_j = Int32Col() # integer + pressure = Float32Col() # float (single-precision) + energy = Float64Col(shape=2) # double (double-precision) + + +def createFile(filename, nrows, filters, atom, recsize, index, verbose): + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="Searchsorted Benchmark", + filters=filters) + title = "This is the IndexArray title" + # Create an IndexArray instance + rowswritten = 0 + # Create an entry + klass = {"small": Small, "medium": Medium} + table = fileh.create_table(fileh.root, 'table', klass[recsize], title, + None, nrows) + for i in range(nrows): + #table.row['var1'] = str(i) + #table.row['var2'] = random.randrange(nrows) + table.row['var2'] = i + table.row['var3'] = i + #table.row['var4'] = i % 2 + #table.row['var4'] = i > 2 + table.row.append() + rowswritten += nrows + table.flush() + rowsize = table.rowsize + indexrows = 0 + + # Index one entry: + if index: + if atom == "string": + indexrows = table.cols.var1.create_index() + elif atom == "bool": + indexrows = table.cols.var4.create_index() + elif atom == "int": + indexrows = table.cols.var2.create_index() + elif atom == "float": + indexrows = table.cols.var3.create_index() + else: + raise ValueError("Index type not supported yet") + if verbose: + print("Number of indexed rows:", indexrows) + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowswritten, rowsize) + + +def readFile(filename, atom, niter, verbose): + # Open the HDF5 file in read-only mode + + fileh = open_file(filename, mode="r") + table = fileh.root.table + print("reading", table) + if atom == "string": + idxcol = table.cols.var1.index + elif atom == "bool": + idxcol = table.cols.var4.index + elif atom == "int": + idxcol = table.cols.var2.index + else: + idxcol = table.cols.var3.index + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + print("Chunk size:", idxcol.sorted.chunksize) + print("Number of elements per slice:", idxcol.nelemslice) + print("Slice number in", table._v_pathname, ":", idxcol.nrows) + + rowselected = 0 + if atom == "string": + for i in range(niter): + #results = [table.row["var3"] for i in table.where(2+i<=table.cols.var2 < 10+i)] + #results = [table.row.nrow() for i in table.where(2<=table.cols.var2 < 10)] + results = [p["var1"] # p.nrow() + for p in table.where(table.cols.var1 == "1111")] +# for p in table.where("1000"<=table.cols.var1<="1010")] + rowselected += len(results) + elif atom == "bool": + for i in range(niter): + results = [p["var2"] # p.nrow() + for p in table.where(table.cols.var4 == 0)] + rowselected += len(results) + elif atom == "int": + for i in range(niter): + #results = [table.row["var3"] for i in table.where(2+i<=table.cols.var2 < 10+i)] + #results = [table.row.nrow() for i in table.where(2<=table.cols.var2 < 10)] + results = [p["var2"] # p.nrow() + # for p in table.where(110*i<=table.cols.var2<110*(i+1))] + # for p in table.where(1000-30", positions) + print("Total iterations in search:", niter) + + rowsread += table.nrows + uncomprBytes += idxcol.sorted.chunksize * niter * idxcol.sorted.itemsize + + results = table.read(coords=positions) + print("results length:", len(results)) + if verbose: + print("Values that fullfill the conditions:") + print(results) + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, uncomprBytes, niter) + + +if __name__ == "__main__": + import sys + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-R range] [-r] [-w] [-s recsize ] [-a + atom] [-c level] [-l complib] [-S] [-F] [-i item] [-n nrows] [-x] + [-k niter] file + -v verbose + -p use "psyco" if available + -R select a range in a field in the form "start,stop,step" + -r only read test + -w only write test + -s record size + -a use [float], [int], [bool] or [string] atom + -c sets a compression level (do not set it or 0 for no compression) + -S activate shuffling filter + -F activate fletcher32 filter + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -i item to search + -n set the number of rows in tables + -x don't make indexes + -k number of iterations for reading\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpSFR:rwxk:s:a:c:l:i:n:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + rng = None + item = None + atom = "int" + fieldName = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + nrows = 100 + recsize = "small" + index = 1 + niter = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + if option[0] == '-p': + usepsyco = 1 + if option[0] == '-S': + shuffle = 1 + if option[0] == '-F': + fletcher32 = 1 + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-x': + index = 0 + elif option[0] == '-s': + recsize = option[1] + elif option[0] == '-a': + atom = option[1] + if atom not in ["float", "int", "bool", "string"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-i': + item = eval(option[1]) + elif option[0] == '-n': + nrows = int(option[1]) + elif option[0] == '-k': + niter = int(option[1]) + + # Build the Filters instance + filters = Filters(complevel=complevel, complib=complib, + shuffle=shuffle, fletcher32=fletcher32) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if testwrite: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(createFile) + (rowsw, rowsz) = createFile(file, nrows, filters, + atom, recsize, index, verbose) + t2 = time.time() + cpu2 = time.clock() + tapprows = round(t2 - t1, 3) + cpuapprows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + print("Write rows/sec: ", int(rowsw / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + + if testread: + if psyco_imported and usepsyco: + psyco.bind(readFile) + psyco.bind(searchFile) + t1 = time.time() + cpu1 = time.clock() + if rng or item: + (rowsr, uncomprB, niter) = searchFile(file, atom, verbose, item) + else: + for i in range(1): + (rowsr, rowsel, rowsz) = readFile(file, atom, niter, verbose) + t2 = time.time() + cpu2 = time.clock() + treadrows = round(t2 - t1, 3) + cpureadrows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + tMrows = rowsr / (1000 * 1000.) + sKrows = rowsel / 1000. + print("Rows read:", rowsr, "Mread:", round(tMrows, 3), "Mrows") + print("Rows selected:", rowsel, "Ksel:", round(sKrows, 3), "Krows") + print("Time reading rows: %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + print("Read Mrows/sec: ", round(tMrows / float(treadrows), 3)) + # print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024)) +# print "Uncompr MB :", int(uncomprB / (1024 * 1024)) +# print "Uncompr MB/s :", int(uncomprB / (treadrows * 1024 * 1024)) +# print "Total chunks uncompr :", int(niter) diff --git a/bench/searchsorted-bench2.py b/bench/searchsorted-bench2.py new file mode 100644 index 0000000..f8446ae --- /dev/null +++ b/bench/searchsorted-bench2.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python + +from __future__ import print_function +import time +from tables import * + + +class Small(IsDescription): + var1 = StringCol(itemsize=4) + var2 = Int32Col() + var3 = Float64Col() + var4 = BoolCol() + +# Define a user record to characterize some kind of particles + + +class Medium(IsDescription): + var1 = StringCol(itemsize=16, dflt="") # 16-character String + #float1 = Float64Col(dflt=2.3) + #float2 = Float64Col(dflt=2.3) + # zADCcount = Int16Col() # signed short integer + var2 = Int32Col() # signed short integer + var3 = Float64Col() + grid_i = Int32Col() # integer + grid_j = Int32Col() # integer + pressure = Float32Col() # float (single-precision) + energy = Float64Col(shape=2) # double (double-precision) + + +def createFile(filename, nrows, filters, atom, recsize, index, verbose): + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="Searchsorted Benchmark", + filters=filters) + title = "This is the IndexArray title" + # Create an IndexArray instance + rowswritten = 0 + # Create an entry + klass = {"small": Small, "medium": Medium} + table = fileh.create_table(fileh.root, 'table', klass[recsize], title, + None, nrows) + for i in range(nrows): + #table.row['var1'] = str(i) + #table.row['var2'] = random.randrange(nrows) + table.row['var2'] = i + table.row['var3'] = i + #table.row['var4'] = i % 2 + table.row['var4'] = i > 2 + table.row.append() + rowswritten += nrows + table.flush() + rowsize = table.rowsize + indexrows = 0 + + # Index one entry: + if index: + if atom == "string": + indexrows = table.cols.var1.create_index() + elif atom == "bool": + indexrows = table.cols.var4.create_index() + elif atom == "int": + indexrows = table.cols.var2.create_index() + elif atom == "float": + indexrows = table.cols.var3.create_index() + else: + raise ValueError("Index type not supported yet") + if verbose: + print("Number of indexed rows:", indexrows) + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowswritten, rowsize) + + +def readFile(filename, atom, niter, verbose): + # Open the HDF5 file in read-only mode + + fileh = open_file(filename, mode="r") + table = fileh.root.table + print("reading", table) + if atom == "string": + idxcol = table.cols.var1.index + elif atom == "bool": + idxcol = table.cols.var4.index + elif atom == "int": + idxcol = table.cols.var2.index + else: + idxcol = table.cols.var3.index + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + print("Chunk size:", idxcol.sorted.chunksize) + print("Number of elements per slice:", idxcol.nelemslice) + print("Slice number in", table._v_pathname, ":", idxcol.nrows) + + rowselected = 0 + if atom == "string": + for i in range(niter): + #results = [table.row["var3"] for i in table(where=2+i<=table.cols.var2 < 10+i)] + #results = [table.row.nrow() for i in table(where=2<=table.cols.var2 < 10)] + results = [p["var1"] # p.nrow() + for p in table(where=table.cols.var1 == "1111")] +# for p in table(where="1000"<=table.cols.var1<="1010")] + rowselected += len(results) + elif atom == "bool": + for i in range(niter): + results = [p["var2"] # p.nrow() + for p in table(where=table.cols.var4 == 0)] + rowselected += len(results) + elif atom == "int": + for i in range(niter): + #results = [table.row["var3"] for i in table(where=2+i<=table.cols.var2 < 10+i)] + #results = [table.row.nrow() for i in table(where=2<=table.cols.var2 < 10)] + results = [p["var2"] # p.nrow() + # for p in table(where=110*i<=table.cols.var2<110*(i+1))] + # for p in table(where=1000-30", positions) + print("Total iterations in search:", niter) + + rowsread += table.nrows + uncomprBytes += idxcol.sorted.chunksize * niter * idxcol.sorted.itemsize + + results = table.read(coords=positions) + print("results length:", len(results)) + if verbose: + print("Values that fullfill the conditions:") + print(results) + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, uncomprBytes, niter) + + +if __name__ == "__main__": + import sys + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-R range] [-r] [-w] [-s recsize ] [-a + atom] [-c level] [-l complib] [-S] [-F] [-i item] [-n nrows] [-x] + [-k niter] file + -v verbose + -p use "psyco" if available + -R select a range in a field in the form "start,stop,step" + -r only read test + -w only write test + -s record size + -a use [float], [int], [bool] or [string] atom + -c sets a compression level (do not set it or 0 for no compression) + -S activate shuffling filter + -F activate fletcher32 filter + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -i item to search + -n set the number of rows in tables + -x don't make indexes + -k number of iterations for reading\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpSFR:rwxk:s:a:c:l:i:n:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + rng = None + item = None + atom = "int" + fieldName = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + nrows = 100 + recsize = "small" + index = 1 + niter = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + if option[0] == '-p': + usepsyco = 1 + if option[0] == '-S': + shuffle = 1 + if option[0] == '-F': + fletcher32 = 1 + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-x': + index = 0 + elif option[0] == '-s': + recsize = option[1] + elif option[0] == '-a': + atom = option[1] + if atom not in ["float", "int", "bool", "string"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-i': + item = eval(option[1]) + elif option[0] == '-n': + nrows = int(option[1]) + elif option[0] == '-k': + niter = int(option[1]) + + # Build the Filters instance + filters = Filters(complevel=complevel, complib=complib, + shuffle=shuffle, fletcher32=fletcher32) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if testwrite: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(createFile) + (rowsw, rowsz) = createFile(file, nrows, filters, + atom, recsize, index, verbose) + t2 = time.time() + cpu2 = time.clock() + tapprows = round(t2 - t1, 3) + cpuapprows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + print("Write rows/sec: ", int(rowsw / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + + if testread: + if psyco_imported and usepsyco: + psyco.bind(readFile) + psyco.bind(searchFile) + t1 = time.time() + cpu1 = time.clock() + if rng or item: + (rowsr, uncomprB, niter) = searchFile(file, atom, verbose, item) + else: + for i in range(1): + (rowsr, rowsel, rowsz) = readFile(file, atom, niter, verbose) + t2 = time.time() + cpu2 = time.clock() + treadrows = round(t2 - t1, 3) + cpureadrows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + tMrows = rowsr / (1000 * 1000.) + sKrows = rowsel / 1000. + print("Rows read:", rowsr, "Mread:", round(tMrows, 3), "Mrows") + print("Rows selected:", rowsel, "Ksel:", round(sKrows, 3), "Krows") + print("Time reading rows: %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + print("Read Mrows/sec: ", round(tMrows / float(treadrows), 3)) + # print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024)) +# print "Uncompr MB :", int(uncomprB / (1024 * 1024)) +# print "Uncompr MB/s :", int(uncomprB / (treadrows * 1024 * 1024)) +# print "Total chunks uncompr :", int(niter) diff --git a/bench/shelve-bench.py b/bench/shelve-bench.py new file mode 100644 index 0000000..4a891c0 --- /dev/null +++ b/bench/shelve-bench.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python + +from __future__ import print_function +from tables import * +import numpy as NA +import struct +import sys +import shelve +import psyco + +# This class is accessible only for the examples + + +class Small(IsDescription): + + """Record descriptor. + + A record has several columns. They are represented here as class + attributes, whose names are the column names and their values will + become their types. The IsDescription class will take care the user + will not add any new variables and that its type is correct. + + """ + + var1 = StringCol(itemsize=4) + var2 = Int32Col() + var3 = Float64Col() + +# Define a user record to characterize some kind of particles + + +class Medium(IsDescription): + name = StringCol(itemsize=16) # 16-character String + float1 = Float64Col(shape=2, dflt=2.3) + #float1 = Float64Col(dflt=1.3) + #float2 = Float64Col(dflt=2.3) + ADCcount = Int16Col() # signed short integer + grid_i = Int32Col() # integer + grid_j = Int32Col() # integer + pressure = Float32Col() # float (single-precision) + energy = Float64Col() # double (double-precision) + +# Define a user record to characterize some kind of particles + + +class Big(IsDescription): + name = StringCol(itemsize=16) # 16-character String + #float1 = Float64Col(shape=32, dflt=NA.arange(32)) + #float2 = Float64Col(shape=32, dflt=NA.arange(32)) + float1 = Float64Col(shape=32, dflt=range(32)) + float2 = Float64Col(shape=32, dflt=[2.2] * 32) + ADCcount = Int16Col() # signed short integer + grid_i = Int32Col() # integer + grid_j = Int32Col() # integer + pressure = Float32Col() # float (single-precision) + energy = Float64Col() # double (double-precision) + + +def createFile(filename, totalrows, recsize): + + # Open a 'n'ew file + fileh = shelve.open(filename, flag="n") + + rowswritten = 0 + # Get the record object associated with the new table + if recsize == "big": + d = Big() + arr = NA.array(NA.arange(32), type=NA.Float64) + arr2 = NA.array(NA.arange(32), type=NA.Float64) + elif recsize == "medium": + d = Medium() + else: + d = Small() + # print d + # sys.exit(0) + for j in range(3): + # Create a table + # table = fileh.create_table(group, 'tuple'+str(j), Record(), title, + # compress = 6, expectedrows = totalrows) + # Create a Table instance + tablename = 'tuple' + str(j) + table = [] + # Fill the table + if recsize == "big" or recsize == "medium": + for i in range(totalrows): + d.name = 'Particle: %6d' % (i) + #d.TDCcount = i % 256 + d.ADCcount = (i * 256) % (1 << 16) + if recsize == "big": + #d.float1 = NA.array([i]*32, NA.Float64) + #d.float2 = NA.array([i**2]*32, NA.Float64) + arr[0] = 1.1 + d.float1 = arr + arr2[0] = 2.2 + d.float2 = arr2 + pass + else: + d.float1 = NA.array([i ** 2] * 2, NA.Float64) + #d.float1 = float(i) + #d.float2 = float(i) + d.grid_i = i + d.grid_j = 10 - i + d.pressure = float(i * i) + d.energy = float(d.pressure ** 4) + table.append((d.ADCcount, d.energy, d.float1, d.float2, + d.grid_i, d.grid_j, d.name, d.pressure)) + # Only on float case + # table.append((d.ADCcount, d.energy, d.float1, + # d.grid_i, d.grid_j, d.name, d.pressure)) + else: + for i in range(totalrows): + d.var1 = str(i) + d.var2 = i + d.var3 = 12.1e10 + table.append((d.var1, d.var2, d.var3)) + + # Save this table on disk + fileh[tablename] = table + rowswritten += totalrows + + # Close the file + fileh.close() + return (rowswritten, struct.calcsize(d._v_fmt)) + + +def readFile(filename, recsize): + # Open the HDF5 file in read-only mode + fileh = shelve.open(filename, "r") + for table in ['tuple0', 'tuple1', 'tuple2']: + if recsize == "big" or recsize == "medium": + e = [t[2] for t in fileh[table] if t[4] < 20] + # if there is only one float (array) + #e = [ t[1] for t in fileh[table] if t[3] < 20 ] + else: + e = [t[1] for t in fileh[table] if t[1] < 20] + + print("resulting selection list ==>", e) + print("Total selected records ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + fileh.close() + + +# Add code to test here +if __name__ == "__main__": + import getopt + import time + + usage = """usage: %s [-f] [-s recsize] [-i iterations] file + -s use [big] record, [medium] or [small] + -i sets the number of rows in each table\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 's:fi:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + recsize = "medium" + iterations = 100 + + # Get the options + for option in opts: + if option[0] == '-s': + recsize = option[1] + if recsize not in ["big", "medium", "small"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == '-i': + iterations = int(option[1]) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + t1 = time.clock() + psyco.bind(createFile) + (rowsw, rowsz) = createFile(file, iterations, recsize) + t2 = time.clock() + tapprows = round(t2 - t1, 3) + + t1 = time.clock() + psyco.bind(readFile) + readFile(file, recsize) + t2 = time.clock() + treadrows = round(t2 - t1, 3) + + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time appending rows:", tapprows) + print("Write rows/sec: ", int(iterations * 3 / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + print("Time reading rows:", treadrows) + print("Read rows/sec: ", int(iterations * 3 / float(treadrows))) + print("Read KB/s :", int(rowsw * rowsz / (treadrows * 1024))) diff --git a/bench/split-file.py b/bench/split-file.py new file mode 100644 index 0000000..4653441 --- /dev/null +++ b/bench/split-file.py @@ -0,0 +1,40 @@ +""" +Split out a monolithic file with many different runs of +indexed_search.py. The resulting files are meant for use in +get-figures.py. + +Usage: python split-file.py prefix filename +""" + +import sys + +prefix = sys.argv[1] +filename = sys.argv[2] +f = open(filename) +sf = None +for line in f: + if line.startswith('Processing database:'): + if sf: + sf.close() + line2 = line.split(':')[1] + # Check if entry is compressed and if has to be processed + line2 = line2[:line2.rfind('.')] + params = line2.split('-') + optlevel = 0 + complib = None + for param in params: + if param[0] == 'O' and param[1].isdigit(): + optlevel = int(param[1]) + elif param[:-1] in ('zlib', 'lzo'): + complib = param + if 'PyTables' in prefix: + if complib: + sfilename = "%s-O%s-%s.out" % (prefix, optlevel, complib) + else: + sfilename = "%s-O%s.out" % (prefix, optlevel,) + else: + sfilename = "%s.out" % (prefix,) + sf = file(sfilename, 'a') + if sf: + sf.write(line) +f.close() diff --git a/bench/sqlite-search-bench-rnd.sh b/bench/sqlite-search-bench-rnd.sh new file mode 100755 index 0000000..629d0e4 --- /dev/null +++ b/bench/sqlite-search-bench-rnd.sh @@ -0,0 +1,105 @@ +#!/bin/sh +# I don't know why, but the /usr/bin/python2.3 from Debian is a 30% slower +# than my own compiled version! 2004-08-18 +python="/usr/local/bin/python2.3 -O" + +writedata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + cmd="${python} sqlite-search-bench.py -R -h -b ${bfile} ${psyco} -m ${smode} -w -n ${nrows} data.nobackup/sqlite-${nrows}k.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + + if [ "$smode" = "indexed" ]; then + #repeats=100 + repeats=20 + else + repeats=2 + fi + cmd="${python} sqlite-search-bench.py -R -h -b ${bfile} ${psyco} -n ${nrows} -m ${smode} -r -k ${repeats} data.nobackup/sqlite-${nrows}k.h5" + echo ${cmd} + ${cmd} + # Finally, delete the source (if desired) + if [ "$smode" = "indexed" ]; then + echo "Deleting data file data.nobackup/sqlite-${nrows}k.h5" +# rm -f data.nobackup/sqlite-${nrows}k.h5 + fi + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi + +# The name of the data bench file +bfile="sqlite-dbench.h5" + +# Move out a possible previous benchmark file +bn=`basename $bfile ".h5"` +mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 +mv -f ${bn}-bck.h5 ${bn}-bck2.h5 +if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 +else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 +fi + +# Configuration for testing +nrowsliststd="1 2" +nrowslistidx="1 2" +#nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000" +#nrowsliststd="1 2 5 10 20" +#nrowslistidx="1 2 5 10 20" +# nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +# nrowslistidx="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +#nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000" +#nrowslistidx="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000" + +for smode in standard indexed; do +#for smode in indexed; do + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering ${smode} mode..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + if [ "$smode" = "standard" ]; then + nrowslist=$nrowsliststd + else + nrowslist=$nrowslistidx + fi + # Write data files + for nrows in $nrowslist; do + echo "*************************************************************" + echo "Writing for nrows=$nrows Krows, $smode, psyco=$psyco" + echo "*************************************************************" + writedata ${nrows} ${bfile} "${smode}" "${psyco}" + done + # Read data files + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${smode}" "${psyco}" + done +done + +echo "New data available on: $bfile" +exit 0 diff --git a/bench/sqlite-search-bench.py b/bench/sqlite-search-bench.py new file mode 100644 index 0000000..995fe0e --- /dev/null +++ b/bench/sqlite-search-bench.py @@ -0,0 +1,455 @@ +#!/usr/bin/python + +from __future__ import print_function +import sqlite +import random +import time +import sys +import os +import os.path +from tables import * +import numpy as np + +randomvalues = 0 +standarddeviation = 10000 +# Initialize the random generator always with the same integer +# in order to have reproductible results +random.seed(19) +np.random.seed((19, 20)) + +# defaults +psycon = 0 +worst = 0 + + +def createNewBenchFile(bfile, verbose): + + class Create(IsDescription): + nrows = Int32Col(pos=0) + irows = Int32Col(pos=1) + tfill = Float64Col(pos=2) + tidx = Float64Col(pos=3) + tcfill = Float64Col(pos=4) + tcidx = Float64Col(pos=5) + rowsecf = Float64Col(pos=6) + rowseci = Float64Col(pos=7) + fsize = Float64Col(pos=8) + isize = Float64Col(pos=9) + psyco = BoolCol(pos=10) + + class Search(IsDescription): + nrows = Int32Col(pos=0) + rowsel = Int32Col(pos=1) + time1 = Float64Col(pos=2) + time2 = Float64Col(pos=3) + tcpu1 = Float64Col(pos=4) + tcpu2 = Float64Col(pos=5) + rowsec1 = Float64Col(pos=6) + rowsec2 = Float64Col(pos=7) + psyco = BoolCol(pos=8) + + if verbose: + print("Creating a new benchfile:", bfile) + # Open the benchmarking file + bf = open_file(bfile, "w") + # Create groups + for recsize in ["sqlite_small"]: + group = bf.create_group("/", recsize, recsize + " Group") + # Attach the row size of table as attribute + if recsize == "small": + group._v_attrs.rowsize = 16 + # Create a Table for writing bench + bf.create_table(group, "create_indexed", Create, "indexed values") + bf.create_table(group, "create_standard", Create, "standard values") + # create a group for searching bench + groupS = bf.create_group(group, "search", "Search Group") + # Create Tables for searching + for mode in ["indexed", "standard"]: + group = bf.create_group(groupS, mode, mode + " Group") + # for searching bench + # for atom in ["string", "int", "float", "bool"]: + for atom in ["string", "int", "float"]: + bf.create_table(group, atom, Search, atom + " bench") + bf.close() + + +def createFile(filename, nrows, filters, indexmode, heavy, noise, bfile, + verbose): + + # Initialize some variables + t1 = 0. + t2 = 0. + tcpu1 = 0. + tcpu2 = 0. + rowsecf = 0. + rowseci = 0. + size1 = 0. + size2 = 0. + + if indexmode == "standard": + print("Creating a new database:", dbfile) + instd = os.popen("/usr/local/bin/sqlite " + dbfile, "w") + CREATESTD = """ +CREATE TABLE small ( +-- Name Type -- Example +--------------------------------------- +recnum INTEGER PRIMARY KEY, -- 345 +var1 char(4), -- Abronia villosa +var2 INTEGER, -- 111 +var3 FLOAT -- 12.32 +); +""" + CREATEIDX = """ +CREATE TABLE small ( +-- Name Type -- Example +--------------------------------------- +recnum INTEGER PRIMARY KEY, -- 345 +var1 char(4), -- Abronia villosa +var2 INTEGER, -- 111 +var3 FLOAT -- 12.32 +); +CREATE INDEX ivar1 ON small(var1); +CREATE INDEX ivar2 ON small(var2); +CREATE INDEX ivar3 ON small(var3); +""" + # Creating the table first and indexing afterwards is a bit faster + instd.write(CREATESTD) + instd.close() + + conn = sqlite.connect(dbfile) + cursor = conn.cursor() + if indexmode == "standard": + place_holders = ",".join(['%s'] * 3) + # Insert rows + SQL = "insert into small values(NULL, %s)" % place_holders + time1 = time.time() + cpu1 = time.clock() + # This way of filling is to copy the PyTables benchmark + nrowsbuf = 1000 + minimum = 0 + maximum = nrows + for i in range(0, nrows, nrowsbuf): + if i + nrowsbuf > nrows: + j = nrows + else: + j = i + nrowsbuf + if randomvalues: + var3 = np.random.uniform(minimum, maximum, shape=[j - i]) + else: + var3 = np.arange(i, j, type=np.Float64) + if noise: + var3 += np.random.uniform(-3, 3, shape=[j - i]) + var2 = np.array(var3, type=np.Int32) + var1 = np.array(None, shape=[j - i], dtype='s4') + if not heavy: + for n in range(j - i): + var1[n] = str("%.4s" % var2[n]) + for n in range(j - i): + fields = (var1[n], var2[n], var3[n]) + cursor.execute(SQL, fields) + conn.commit() + t1 = round(time.time() - time1, 5) + tcpu1 = round(time.clock() - cpu1, 5) + rowsecf = nrows / t1 + size1 = os.stat(dbfile)[6] + print("******** Results for writing nrows = %s" % (nrows), "*********") + print(("Insert time:", t1, ", KRows/s:", + round((nrows / 10. ** 3) / t1, 3),)) + print(", File size:", round(size1 / (1024. * 1024.), 3), "MB") + + # Indexem + if indexmode == "indexed": + time1 = time.time() + cpu1 = time.clock() + if not heavy: + cursor.execute("CREATE INDEX ivar1 ON small(var1)") + conn.commit() + cursor.execute("CREATE INDEX ivar2 ON small(var2)") + conn.commit() + cursor.execute("CREATE INDEX ivar3 ON small(var3)") + conn.commit() + t2 = round(time.time() - time1, 5) + tcpu2 = round(time.clock() - cpu1, 5) + rowseci = nrows / t2 + print(("Index time:", t2, ", IKRows/s:", + round((nrows / 10. ** 3) / t2, 3),)) + size2 = os.stat(dbfile)[6] - size1 + print((", Final size with index:", + round(size2 / (1024. * 1024), 3), "MB")) + + conn.close() + + # Collect benchmark data + bf = open_file(bfile, "a") + recsize = "sqlite_small" + if indexmode == "indexed": + table = bf.get_node("/" + recsize + "/create_indexed") + else: + table = bf.get_node("/" + recsize + "/create_standard") + table.row["nrows"] = nrows + table.row["irows"] = nrows + table.row["tfill"] = t1 + table.row["tidx"] = t2 + table.row["tcfill"] = tcpu1 + table.row["tcidx"] = tcpu2 + table.row["psyco"] = psycon + table.row["rowsecf"] = rowsecf + table.row["rowseci"] = rowseci + table.row["fsize"] = size1 + table.row["isize"] = size2 + table.row.append() + bf.close() + + return + + +def readFile(dbfile, nrows, indexmode, heavy, dselect, bfile, riter): + # Connect to the database. + conn = sqlite.connect(db=dbfile, mode=755) + # Obtain a cursor + cursor = conn.cursor() + + # select count(*), avg(var2) + SQL1 = """ + select recnum + from small where var1 = %s + """ + SQL2 = """ + select recnum + from small where var2 >= %s and var2 < %s + """ + SQL3 = """ + select recnum + from small where var3 >= %s and var3 < %s + """ + + # Open the benchmark database + bf = open_file(bfile, "a") + # default values for the case that columns are not indexed + t2 = 0. + tcpu2 = 0. + # Some previous computations for the case of random values + if randomvalues: + # algorithm to choose a value separated from mean +# If want to select fewer values, select this +# if nrows/2 > standarddeviation*3: +# Choose five standard deviations away from mean value +# dev = standarddeviation*5 +# dev = standarddeviation*math.log10(nrows/1000.) + + # This algorithm give place to too asymmetric result values +# if standarddeviation*10 < nrows/2: +# Choose four standard deviations away from mean value +# dev = standarddeviation*4 +# else: +# dev = 100 + # Yet Another Algorithm + if nrows / 2 > standarddeviation * 10: + dev = standarddeviation * 4. + elif nrows / 2 > standarddeviation: + dev = standarddeviation * 2. + elif nrows / 2 > standarddeviation / 10.: + dev = standarddeviation / 10. + else: + dev = standarddeviation / 100. + + valmax = int(round((nrows / 2.) - dev)) + # split the selection range in regular chunks + if riter > valmax * 2: + riter = valmax * 2 + chunksize = (valmax * 2 / riter) * 10 + # Get a list of integers for the intervals + randlist = range(0, valmax, chunksize) + randlist.extend(range(nrows - valmax, nrows, chunksize)) + # expand the list ten times so as to use the cache + randlist = randlist * 10 + # shuffle the list + random.shuffle(randlist) + # reset the value of chunksize + chunksize = chunksize / 10 + # print "chunksize-->", chunksize + # randlist.sort();print "randlist-->", randlist + else: + chunksize = 3 + if heavy: + searchmodelist = ["int", "float"] + else: + searchmodelist = ["string", "int", "float"] + + # Execute queries + for atom in searchmodelist: + time2 = 0 + cpu2 = 0 + rowsel = 0 + for i in range(riter): + rnd = random.randrange(nrows) + time1 = time.time() + cpu1 = time.clock() + if atom == "string": + #cursor.execute(SQL1, "1111") + cursor.execute(SQL1, str(rnd)[-4:]) + elif atom == "int": + #cursor.execute(SQL2 % (rnd, rnd+3)) + cursor.execute(SQL2 % (rnd, rnd + dselect)) + elif atom == "float": + #cursor.execute(SQL3 % (float(rnd), float(rnd+3))) + cursor.execute(SQL3 % (float(rnd), float(rnd + dselect))) + else: + raise ValueError( + "atom must take a value in ['string','int','float']") + if i == 0: + t1 = time.time() - time1 + tcpu1 = time.clock() - cpu1 + else: + if indexmode == "indexed": + # if indexed, wait until the 5th iteration to take + # times (so as to insure that the index is + # effectively cached) + if i >= 5: + time2 += time.time() - time1 + cpu2 += time.clock() - cpu1 + else: + time2 += time.time() - time1 + time2 += time.clock() - cpu1 + if riter > 1: + if indexmode == "indexed" and riter >= 5: + correction = 5 + else: + correction = 1 + t2 = time2 / (riter - correction) + tcpu2 = cpu2 / (riter - correction) + + print(("*** Query results for atom = %s, nrows = %s, " + "indexmode = %s ***" % (atom, nrows, indexmode))) + print("Query time:", round(t1, 5), ", cached time:", round(t2, 5)) + print("MRows/s:", round((nrows / 10. ** 6) / t1, 3), end=' ') + if t2 > 0: + print(", cached MRows/s:", round((nrows / 10. ** 6) / t2, 3)) + else: + print() + + # Collect benchmark data + recsize = "sqlite_small" + tablepath = "/" + recsize + "/search/" + indexmode + "/" + atom + table = bf.get_node(tablepath) + table.row["nrows"] = nrows + table.row["rowsel"] = rowsel + table.row["time1"] = t1 + table.row["time2"] = t2 + table.row["tcpu1"] = tcpu1 + table.row["tcpu2"] = tcpu2 + table.row["psyco"] = psycon + table.row["rowsec1"] = nrows / t1 + if t2 > 0: + table.row["rowsec2"] = nrows / t2 + table.row.append() + table.flush() # Flush the data + + # Close the database + conn.close() + bf.close() # the bench database + + return + +if __name__ == "__main__": + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-R] [-h] [-t] [-r] [-w] [-n nrows] [-b file] [-k riter] [-m indexmode] [-N range] datafile + -v verbose + -p use "psyco" if available + -R use Random values for filling + -h heavy mode (exclude strings from timings) + -t worsT searching case (to emulate PyTables worst cases) + -r only read test + -w only write test + -n the number of rows (in krows) + -b bench filename + -N introduce (uniform) noise within range into the values + -d the interval for look values (int, float) at. Default is 3. + -k number of iterations for reading\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpRhtrwn:b:k:m:N:d:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + dselect = 3. + noise = 0. + verbose = 0 + heavy = 0 + testread = 1 + testwrite = 1 + usepsyco = 0 + nrows = 1000 + bfile = "sqlite-bench.h5" + supported_imodes = ["indexed", "standard"] + indexmode = "indexed" + riter = 2 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + if option[0] == '-p': + usepsyco = 1 + elif option[0] == '-R': + randomvalues = 1 + elif option[0] == '-h': + heavy = 1 + elif option[0] == '-t': + worst = 1 + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-b': + bfile = option[1] + elif option[0] == '-N': + noise = float(option[1]) + elif option[0] == '-m': + indexmode = option[1] + if indexmode not in supported_imodes: + raise ValueError( + "Indexmode should be any of '%s' and you passed '%s'" % + (supported_imodes, indexmode)) + elif option[0] == '-n': + nrows = int(float(option[1]) * 1000) + elif option[0] == '-d': + dselect = float(option[1]) + elif option[0] == '-k': + riter = int(option[1]) + + # remaining parameters + dbfile = pargs[0] + + if worst: + nrows -= 1 # the worst case + + # Create the benchfile (if needed) + if not os.path.exists(bfile): + createNewBenchFile(bfile, verbose) + + if testwrite: + if psyco_imported and usepsyco: + psyco.bind(createFile) + psycon = 1 + createFile(dbfile, nrows, None, indexmode, heavy, noise, bfile, + verbose) + + if testread: + if psyco_imported and usepsyco: + psyco.bind(readFile) + psycon = 1 + readFile(dbfile, nrows, indexmode, heavy, dselect, bfile, riter) diff --git a/bench/sqlite-search-bench.sh b/bench/sqlite-search-bench.sh new file mode 100755 index 0000000..4adca89 --- /dev/null +++ b/bench/sqlite-search-bench.sh @@ -0,0 +1,96 @@ +#!/bin/sh +# I don't know why, but the /usr/bin/python2.3 from Debian is a 30% slower +# than my own compiled version! 2004-08-18 +python="/usr/local/bin/python2.3 -O" + +writedata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + cmd="${python} sqlite-search-bench.py -b ${bfile} ${psyco} -m ${smode} -w -n ${nrows} data.nobackup/sqlite-${nrows}k-${smode}.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + + if [ "$smode" = "indexed" ]; then + repeats=100 + else + repeats=2 + fi + cmd="${python} sqlite-search-bench.py -b ${bfile} ${psyco} -n ${nrows} -m ${smode} -r -k ${repeats} data.nobackup/sqlite-${nrows}k-${smode}.h5" + echo ${cmd} + ${cmd} + # Finally, delete the source (if desired) + #rm -f data.nobackup/sqlite-${nrows}k-${smode}.h5 + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi + +# The name of the data bench file +bfile="sqlite-dbench.h5" + +# Move out a possible previous benchmark file +bn=`basename $bfile ".h5"` +mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 +mv -f ${bn}-bck.h5 ${bn}-bck2.h5 +if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 +else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 +fi + +# Configuration for testing +nrowsliststd="1 2 5 10 20 50" +#nrowslistidx="1 2 5 10 20 50" +#nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000" +nrowslistidx="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" + +#for smode in standard indexed; do +for smode in indexed; do + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering ${smode} mode..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + if [ "$smode" = "standard" ]; then + nrowslist=$nrowsliststd + else + nrowslist=$nrowslistidx + fi + # Write data files +# for nrows in $nrowslist; do +# echo "*************************************************************" +# echo "Writing for nrows=$nrows Krows, $smode, psyco=$psyco" +# echo "*************************************************************" +# writedata ${nrows} ${bfile} "${smode}" "${psyco}" +# done + # Read data files + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${smode}" "${psyco}" + done +done + +echo "New data available on: $bfile" +exit 0 diff --git a/bench/sqlite3-search-bench.py b/bench/sqlite3-search-bench.py new file mode 100644 index 0000000..60c418a --- /dev/null +++ b/bench/sqlite3-search-bench.py @@ -0,0 +1,190 @@ +from __future__ import print_function +import os +import os.path +from time import time +import numpy +import random + +# in order to always generate the same random sequence +random.seed(19) + + +def fill_arrays(start, stop): + col_i = numpy.arange(start, stop, dtype=numpy.int32) + if userandom: + col_j = numpy.random.uniform(0, nrows, stop - start) + else: + col_j = numpy.array(col_i, dtype=numpy.float64) + return col_i, col_j + +# Generator for ensure pytables benchmark compatibility + + +def int_generator(nrows): + step = 1000 * 100 + j = 0 + for i in range(nrows): + if i >= step * j: + stop = (j + 1) * step + if stop > nrows: # Seems unnecessary + stop = nrows + col_i, col_j = fill_arrays(i, stop) + j += 1 + k = 0 + yield (col_i[k], col_j[k]) + k += 1 + + +def int_generator_slow(nrows): + for i in range(nrows): + if userandom: + yield (i, float(random.randint(0, nrows))) + else: + yield (i, float(i)) + + +def open_db(filename, remove=0): + if remove and os.path.exists(filename): + os.remove(filename) + con = sqlite.connect(filename) + cur = con.cursor() + return con, cur + + +def create_db(filename, nrows): + con, cur = open_db(filename, remove=1) + cur.execute("create table ints(i integer, j real)") + t1 = time() + # This is twice as fast as a plain loop + cur.executemany("insert into ints(i,j) values (?,?)", int_generator(nrows)) + con.commit() + ctime = time() - t1 + if verbose: + print("insert time:", round(ctime, 5)) + print("Krows/s:", round((nrows / 1000.) / ctime, 5)) + close_db(con, cur) + + +def index_db(filename): + con, cur = open_db(filename) + t1 = time() + cur.execute("create index ij on ints(j)") + con.commit() + itime = time() - t1 + if verbose: + print("index time:", round(itime, 5)) + print("Krows/s:", round(nrows / itime, 5)) + # Close the DB + close_db(con, cur) + + +def query_db(filename, rng): + con, cur = open_db(filename) + t1 = time() + ntimes = 10 + for i in range(ntimes): + # between clause does not seem to take advantage of indexes + # cur.execute("select j from ints where j between %s and %s" % \ + cur.execute("select i from ints where j >= %s and j <= %s" % + # cur.execute("select i from ints where i >= %s and i <= + # %s" % + (rng[0] + i, rng[1] + i)) + results = cur.fetchall() + con.commit() + qtime = (time() - t1) / ntimes + if verbose: + print("query time:", round(qtime, 5)) + print("Mrows/s:", round((nrows / 1000.) / qtime, 5)) + print(results) + close_db(con, cur) + + +def close_db(con, cur): + cur.close() + con.close() + +if __name__ == "__main__": + import sys + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-v] [-p] [-m] [-i] [-q] [-c] [-R range] [-n nrows] file + -v verbose + -p use "psyco" if available + -m use random values to fill the table + -q do query + -c create the database + -i index the table + -2 use sqlite2 (default is use sqlite3) + -R select a range in a field in the form "start,stop" (def "0,10") + -n sets the number of rows (in krows) in each table + \n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpmiqc2R:n:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + createindex = 0 + doquery = 0 + sqlite_version = "3" + rng = [0, 10] + nrows = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + elif option[0] == '-p': + usepsyco = 1 + elif option[0] == '-m': + userandom = 1 + elif option[0] == '-i': + createindex = 1 + elif option[0] == '-q': + doquery = 1 + elif option[0] == '-c': + docreate = 1 + elif option[0] == "-2": + sqlite_version = "2" + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-n': + nrows = int(option[1]) + + # Catch the hdf5 file passed as the last argument + filename = pargs[0] + + if sqlite_version == "2": + import sqlite + else: + from pysqlite2 import dbapi2 as sqlite + + if verbose: + print("pysqlite version:", sqlite.version) + if userandom: + print("using random values") + + if docreate: + if verbose: + print("writing %s krows" % nrows) + if psyco_imported and usepsyco: + psyco.bind(create_db) + nrows *= 1000 + create_db(filename, nrows) + + if createindex: + index_db(filename) + + if doquery: + query_db(filename, rng) diff --git a/bench/stress-test.py b/bench/stress-test.py new file mode 100644 index 0000000..109e7a4 --- /dev/null +++ b/bench/stress-test.py @@ -0,0 +1,401 @@ +from __future__ import print_function +import gc +import sys +import time +#import types +import numpy +from tables import Group # , MetaIsDescription +from tables import * + + +class Test(IsDescription): + ngroup = Int32Col(pos=1) + ntable = Int32Col(pos=2) + nrow = Int32Col(pos=3) + #string = StringCol(itemsize=500, pos=4) + +TestDict = { + "ngroup": Int32Col(pos=1), + "ntable": Int32Col(pos=2), + "nrow": Int32Col(pos=3), +} + + +def createFileArr(filename, ngroups, ntables, nrows): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + fileh.create_group("/", 'group%04d' % k, "Group %d" % k) + + fileh.close() + + # Now, create the arrays + arr = numpy.arange(nrows) + for k in range(ngroups): + fileh = open_file(filename, mode="a", root_uep='group%04d' % k) + for j in range(ntables): + # Create the array + fileh.create_array("/", 'array%04d' % j, arr, "Array %d" % j) + fileh.close() + + return (ngroups * ntables * nrows, 4) + + +def readFileArr(filename, ngroups, recsize, verbose): + + rowsread = 0 + for ngroup in range(ngroups): + fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) + # Get the group + group = fileh.root + narrai = 0 + if verbose: + print("Group ==>", group) + for arrai in fileh.list_nodes(group, 'Array'): + if verbose > 1: + print("Array ==>", arrai) + print("Rows in", arrai._v_pathname, ":", arrai.shape) + + arr = arrai.read() + + rowsread += len(arr) + narrai += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, 4, rowsread * 4) + + +def createFile(filename, ngroups, ntables, nrows, complevel, complib, recsize): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", 'group%04d' % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + if not ntables: + rowsize = 0 + + for k in range(ngroups): + print("Filling tables in group:", k) + fileh = open_file(filename, mode="a", root_uep='group%04d' % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + # table = fileh.create_table(group, 'table%04d'% j, Test, + table = fileh.create_table(group, 'table%04d' % j, TestDict, + 'Table%04d' % j, + complevel, complib, nrows) + rowsize = table.rowsize + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row['ngroup'] = k + row['ntable'] = j + row['nrow'] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + return (rowswritten, rowsize) + + +def readFile(filename, ngroups, recsize, verbose): + # Open the HDF5 file in read-only mode + + rowsize = 0 + buffersize = 0 + rowsread = 0 + for ngroup in range(ngroups): + fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, 'Table'): + rowsize = table.rowsize + buffersize = table.rowsize * table.nrowsinbuf + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + if table.nrows > 0: # only read if we have rows in tables + for row in table: + try: + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + except: + print("Error in group: %d, table: %d, row: %d" % + (ngroup, ntable, nrow)) + print("Record ==>", row) + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize, buffersize) + + +class TrackRefs: + + """Object to track reference counts across test runs.""" + + def __init__(self, verbose=0): + self.type2count = {} + self.type2all = {} + self.verbose = verbose + + def update(self, verbose=0): + obs = sys.getobjects(0) + type2count = {} + type2all = {} + for o in obs: + all = sys.getrefcount(o) + t = type(o) + if verbose: + # if t == types.TupleType: + if isinstance(o, Group): + # if isinstance(o, MetaIsDescription): + print("-->", o, "refs:", all) + refrs = gc.get_referrers(o) + trefrs = [] + for refr in refrs: + trefrs.append(type(refr)) + print("Referrers -->", refrs) + print("Referrers types -->", trefrs) + # if t == types.StringType: print "-->",o + if t in type2count: + type2count[t] += 1 + type2all[t] += all + else: + type2count[t] = 1 + type2all[t] = all + + ct = sorted([(type2count[t] - self.type2count.get(t, 0), + type2all[t] - self.type2all.get(t, 0), + t) + for t in type2count.keys()]) + ct.reverse() + for delta1, delta2, t in ct: + if delta1 or delta2: + print("%-55s %8d %8d" % (t, delta1, delta2)) + + self.type2count = type2count + self.type2all = type2all + + +def dump_refs(preheat=10, iter1=10, iter2=10, *testargs): + + rc1 = rc2 = None + # testMethod() + for i in range(preheat): + testMethod(*testargs) + gc.collect() + rc1 = sys.gettotalrefcount() + track = TrackRefs() + for i in range(iter1): + testMethod(*testargs) + print("First output of TrackRefs:") + gc.collect() + rc2 = sys.gettotalrefcount() + track.update() + print("Inc refs in function testMethod --> %5d" % (rc2 - rc1), + file=sys.stderr) + for i in range(iter2): + testMethod(*testargs) + track.update(verbose=1) + print("Second output of TrackRefs:") + gc.collect() + rc3 = sys.gettotalrefcount() + + print("Inc refs in function testMethod --> %5d" % (rc3 - rc2), + file=sys.stderr) + + +def dump_garbage(): + """show us waht the garbage is about.""" + # Force collection + print("\nGARBAGE:") + gc.collect() + + print("\nGARBAGE OBJECTS:") + for x in gc.garbage: + s = str(x) + #if len(s) > 80: s = s[:77] + "..." + print(type(x), "\n ", s) + + # print "\nTRACKED OBJECTS:" + # reportLoggedInstances("*") + + +def testMethod(file, usearray, testwrite, testread, complib, complevel, + ngroups, ntables, nrows): + + if complevel > 0: + print("Compression library:", complib) + if testwrite: + t1 = time.time() + cpu1 = time.clock() + if usearray: + (rowsw, rowsz) = createFileArr(file, ngroups, ntables, nrows) + else: + (rowsw, rowsz) = createFile(file, ngroups, ntables, nrows, + complevel, complib, recsize) + t2 = time.time() + cpu2 = time.clock() + tapprows = round(t2 - t1, 3) + cpuapprows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + print("Write rows/sec: ", int(rowsw / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + + if testread: + t1 = time.time() + cpu1 = time.clock() + if usearray: + (rowsr, rowsz, bufsz) = readFileArr(file, + ngroups, recsize, verbose) + else: + (rowsr, rowsz, bufsz) = readFile(file, ngroups, recsize, verbose) + t2 = time.time() + cpu2 = time.clock() + treadrows = round(t2 - t1, 3) + cpureadrows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + print("Rows read:", rowsr, " Row size:", rowsz, "Buf size:", bufsz) + print("Time reading rows: %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + print("Read rows/sec: ", int(rowsr / float(treadrows))) + print("Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))) + +if __name__ == "__main__": + import getopt + import profile + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file + -d debugging level + -v verbosity level + -p use "psyco" if available + -a use Array objects instead of Table + -r only read test + -w only write test + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -c sets a compression level (do not set it or 0 for no compression) + -g number of groups hanging from "/" + -t number of tables per group + -i number of rows per table +""" + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'd:v:parwl:c:g:t:i:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + ngroups = 5 + ntables = 5 + nrows = 100 + verbose = 0 + debug = 0 + recsize = "medium" + testread = 1 + testwrite = 1 + usepsyco = 0 + usearray = 0 + complevel = 0 + complib = "zlib" + + # Get the options + for option in opts: + if option[0] == '-d': + debug = int(option[1]) + if option[0] == '-v': + verbose = int(option[1]) + if option[0] == '-p': + usepsyco = 1 + if option[0] == '-a': + usearray = 1 + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-g': + ngroups = int(option[1]) + elif option[0] == '-t': + ntables = int(option[1]) + elif option[0] == '-i': + nrows = int(option[1]) + + if debug: + gc.enable() + + if debug == 1: + gc.set_debug(gc.DEBUG_LEAK) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if psyco_imported and usepsyco: + psyco.bind(createFile) + psyco.bind(readFile) + + if debug == 2: + dump_refs(10, 10, 15, file, usearray, testwrite, testread, complib, + complevel, ngroups, ntables, nrows) + else: +# testMethod(file, usearray, testwrite, testread, complib, complevel, +# ngroups, ntables, nrows) + profile.run("testMethod(file, usearray, testwrite, testread, " + + "complib, complevel, ngroups, ntables, nrows)") + + # Show the dirt + if debug == 1: + dump_garbage() diff --git a/bench/stress-test2.py b/bench/stress-test2.py new file mode 100644 index 0000000..1dc91fe --- /dev/null +++ b/bench/stress-test2.py @@ -0,0 +1,238 @@ +from __future__ import print_function +import gc +import sys +import time +import random +from tables import * + + +class Test(IsDescription): + ngroup = Int32Col(pos=1) + ntable = Int32Col(pos=2) + nrow = Int32Col(pos=3) + time = Float64Col(pos=5) + random = Float32Col(pos=4) + + +def createFile(filename, ngroups, ntables, nrows, complevel, complib, recsize): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", 'group%04d' % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + for k in range(ngroups): + fileh = open_file(filename, mode="a", root_uep='group%04d' % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + table = fileh.create_table(group, 'table%04d' % j, Test, + 'Table%04d' % j, + complevel, complib, nrows) + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row['time'] = time.time() + row['random'] = random.random() * 40 + 100 + row['ngroup'] = k + row['ntable'] = j + row['nrow'] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + return (rowswritten, table.rowsize) + + +def readFile(filename, ngroups, recsize, verbose): + # Open the HDF5 file in read-only mode + + rowsread = 0 + for ngroup in range(ngroups): + fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, 'Table'): + rowsize = table.rowsize + buffersize = table.rowsize * table.nrowsinbuf + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + time_1 = 0.0 + for row in table: + try: + # print "row['ngroup'], ngroup ==>", row["ngroup"], ngroup + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + # print "row['time'], time_1 ==>", row["time"], time_1 + assert row["time"] >= (time_1 - 0.01) + #assert 100 <= row["random"] <= 139.999 + assert 100 <= row["random"] <= 140 + except: + print("Error in group: %d, table: %d, row: %d" % + (ngroup, ntable, nrow)) + print("Record ==>", row) + time_1 = row["time"] + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize, buffersize) + + +def dump_garbage(): + """show us waht the garbage is about.""" + # Force collection + print("\nGARBAGE:") + gc.collect() + + print("\nGARBAGE OBJECTS:") + for x in gc.garbage: + s = str(x) + #if len(s) > 80: s = s[:77] + "..." + print(type(x), "\n ", s) + +if __name__ == "__main__": + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file + -d debugging level + -v verbosity level + -p use "psyco" if available + -r only read test + -w only write test + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -c sets a compression level (do not set it or 0 for no compression) + -g number of groups hanging from "/" + -t number of tables per group + -i number of rows per table +""" + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'd:v:prwl:c:g:t:i:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + ngroups = 5 + ntables = 5 + nrows = 100 + verbose = 0 + debug = 0 + recsize = "medium" + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + complib = "zlib" + + # Get the options + for option in opts: + if option[0] == '-d': + debug = int(option[1]) + if option[0] == '-v': + verbose = int(option[1]) + if option[0] == '-p': + usepsyco = 1 + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-g': + ngroups = int(option[1]) + elif option[0] == '-t': + ntables = int(option[1]) + elif option[0] == '-i': + nrows = int(option[1]) + + if debug: + gc.enable() + gc.set_debug(gc.DEBUG_LEAK) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if testwrite: + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(createFile) + (rowsw, rowsz) = createFile(file, ngroups, ntables, nrows, + complevel, complib, recsize) + t2 = time.time() + cpu2 = time.clock() + tapprows = round(t2 - t1, 3) + cpuapprows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + print("Write rows/sec: ", int(rowsw / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + + if testread: + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(readFile) + (rowsr, rowsz, bufsz) = readFile(file, ngroups, recsize, verbose) + t2 = time.time() + cpu2 = time.clock() + treadrows = round(t2 - t1, 3) + cpureadrows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + print("Rows read:", rowsr, " Row size:", rowsz, "Buf size:", bufsz) + print("Time reading rows: %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + print("Read rows/sec: ", int(rowsr / float(treadrows))) + print("Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))) + + # Show the dirt + if debug > 1: + dump_garbage() diff --git a/bench/stress-test3.py b/bench/stress-test3.py new file mode 100644 index 0000000..42630f5 --- /dev/null +++ b/bench/stress-test3.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python + +"""This script allows to create arbitrarily large files with the desired +combination of groups, tables per group and rows per table. + +Issue "python stress-test3.py" without parameters for a help on usage. + +""" + +from __future__ import print_function +import gc +import sys +import time +from tables import * + + +class Test(IsDescription): + ngroup = Int32Col(pos=1) + ntable = Int32Col(pos=2) + nrow = Int32Col(pos=3) + string = StringCol(500, pos=4) + + +def createFileArr(filename, ngroups, ntables, nrows): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + fileh.create_group("/", 'group%04d' % k, "Group %d" % k) + + fileh.close() + + return (0, 4) + + +def readFileArr(filename, ngroups, recsize, verbose): + + rowsread = 0 + for ngroup in range(ngroups): + fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, 'Array'): + if verbose > 1: + print("Array ==>", table) + print("Rows in", table._v_pathname, ":", table.shape) + + arr = table.read() + + rowsread += len(arr) + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, 4, 0) + + +def createFile(filename, ngroups, ntables, nrows, complevel, complib, recsize): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", 'group%04d' % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + for k in range(ngroups): + fileh = open_file(filename, mode="a", root_uep='group%04d' % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + table = fileh.create_table(group, 'table%04d' % j, Test, + 'Table%04d' % j, + Filters(complevel, complib), nrows) + rowsize = table.rowsize + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row['ngroup'] = k + row['ntable'] = j + row['nrow'] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + return (rowswritten, rowsize) + + +def readFile(filename, ngroups, recsize, verbose): + # Open the HDF5 file in read-only mode + + rowsread = 0 + for ngroup in range(ngroups): + fileh = open_file(filename, mode="r", root_uep='group%04d' % ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, 'Table'): + rowsize = table.rowsize + buffersize = table.rowsize * table.nrowsinbuf + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + for row in table: + try: + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + except: + print("Error in group: %d, table: %d, row: %d" % + (ngroup, ntable, nrow)) + print("Record ==>", row) + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize, buffersize) + + +def dump_garbage(): + """show us waht the garbage is about.""" + # Force collection + print("\nGARBAGE:") + gc.collect() + + print("\nGARBAGE OBJECTS:") + for x in gc.garbage: + s = str(x) + #if len(s) > 80: s = s[:77] + "..." + print(type(x), "\n ", s) + +if __name__ == "__main__": + import getopt + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file + -d debugging level + -v verbosity level + -p use "psyco" if available + -a use Array objects instead of Table + -r only read test + -w only write test + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -c sets a compression level (do not set it or 0 for no compression) + -g number of groups hanging from "/" + -t number of tables per group + -i number of rows per table +""" + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'd:v:parwl:c:g:t:i:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + ngroups = 5 + ntables = 5 + nrows = 100 + verbose = 0 + debug = 0 + recsize = "medium" + testread = 1 + testwrite = 1 + usepsyco = 0 + usearray = 0 + complevel = 0 + complib = "zlib" + + # Get the options + for option in opts: + if option[0] == '-d': + debug = int(option[1]) + if option[0] == '-v': + verbose = int(option[1]) + if option[0] == '-p': + usepsyco = 1 + if option[0] == '-a': + usearray = 1 + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-g': + ngroups = int(option[1]) + elif option[0] == '-t': + ntables = int(option[1]) + elif option[0] == '-i': + nrows = int(option[1]) + + if debug: + gc.enable() + gc.set_debug(gc.DEBUG_LEAK) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if testwrite: + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(createFile) + if usearray: + (rowsw, rowsz) = createFileArr(file, ngroups, ntables, nrows) + else: + (rowsw, rowsz) = createFile(file, ngroups, ntables, nrows, + complevel, complib, recsize) + t2 = time.time() + cpu2 = time.clock() + tapprows = round(t2 - t1, 3) + cpuapprows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + print("Write rows/sec: ", int(rowsw / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + + if testread: + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(readFile) + if usearray: + (rowsr, rowsz, bufsz) = readFileArr(file, + ngroups, recsize, verbose) + else: + (rowsr, rowsz, bufsz) = readFile(file, ngroups, recsize, verbose) + t2 = time.time() + cpu2 = time.clock() + treadrows = round(t2 - t1, 3) + cpureadrows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + print("Rows read:", rowsr, " Row size:", rowsz, "Buf size:", bufsz) + print("Time reading rows: %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + print("Read rows/sec: ", int(rowsr / float(treadrows))) + print("Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))) + + # Show the dirt + if debug > 1: + dump_garbage() diff --git a/bench/table-bench.py b/bench/table-bench.py new file mode 100644 index 0000000..64d304b --- /dev/null +++ b/bench/table-bench.py @@ -0,0 +1,424 @@ +#!/usr/bin/env python + +from __future__ import print_function +import numpy as NP +from tables import * + +# This class is accessible only for the examples + + +class Small(IsDescription): + var1 = StringCol(itemsize=4, pos=2) + var2 = Int32Col(pos=1) + var3 = Float64Col(pos=0) + +# Define a user record to characterize some kind of particles + + +class Medium(IsDescription): + name = StringCol(itemsize=16, pos=0) # 16-character String + float1 = Float64Col(shape=2, dflt=NP.arange(2), pos=1) + #float1 = Float64Col(dflt=2.3) + #float2 = Float64Col(dflt=2.3) + # zADCcount = Int16Col() # signed short integer + ADCcount = Int32Col(pos=6) # signed short integer + grid_i = Int32Col(pos=7) # integer + grid_j = Int32Col(pos=8) # integer + pressure = Float32Col(pos=9) # float (single-precision) + energy = Float64Col(pos=2) # double (double-precision) + # unalig = Int8Col() # just to unalign data + +# Define a user record to characterize some kind of particles + + +class Big(IsDescription): + name = StringCol(itemsize=16) # 16-character String + float1 = Float64Col(shape=32, dflt=NP.arange(32)) + float2 = Float64Col(shape=32, dflt=2.2) + TDCcount = Int8Col() # signed short integer + #ADCcount = Int32Col() + # ADCcount = Int16Col() # signed short integer + grid_i = Int32Col() # integer + grid_j = Int32Col() # integer + pressure = Float32Col() # float (single-precision) + energy = Float64Col() # double (double-precision) + + +def createFile(filename, totalrows, filters, recsize): + + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="Table Benchmark", + filters=filters) + + # Table title + title = "This is the table title" + + # Create a Table instance + group = fileh.root + rowswritten = 0 + for j in range(3): + # Create a table + if recsize == "big": + table = fileh.create_table(group, 'tuple' + str(j), Big, title, + None, + totalrows) + elif recsize == "medium": + table = fileh.create_table(group, 'tuple' + str(j), Medium, title, + None, + totalrows) + elif recsize == "small": + table = fileh.create_table(group, 'tuple' + str(j), Small, title, + None, + totalrows) + else: + raise RuntimeError("This should never happen") + + table.attrs.test = 2 + rowsize = table.rowsize + # Get the row object associated with the new table + d = table.row + # Fill the table + if recsize == "big": + for i in range(totalrows): + # d['name'] = 'Part: %6d' % (i) + d['TDCcount'] = i % 256 + #d['float1'] = NP.array([i]*32, NP.float64) + #d['float2'] = NP.array([i**2]*32, NP.float64) + #d['float1'][0] = float(i) + #d['float2'][0] = float(i*2) + # Common part with medium + d['grid_i'] = i + d['grid_j'] = 10 - i + d['pressure'] = float(i * i) + # d['energy'] = float(d['pressure'] ** 4) + d['energy'] = d['pressure'] + # d['idnumber'] = i * (2 ** 34) + d.append() + elif recsize == "medium": + for i in range(totalrows): + #d['name'] = 'Part: %6d' % (i) + #d['float1'] = NP.array([i]*2, NP.float64) + #d['float1'] = arr + #d['float1'] = i + #d['float2'] = float(i) + # Common part with big: + d['grid_i'] = i + d['grid_j'] = 10 - i + d['pressure'] = i * 2 + # d['energy'] = float(d['pressure'] ** 4) + d['energy'] = d['pressure'] + d.append() + else: # Small record + for i in range(totalrows): + #d['var1'] = str(random.randrange(1000000)) + #d['var3'] = random.randrange(10000000) + d['var1'] = str(i) + #d['var2'] = random.randrange(totalrows) + d['var2'] = i + #d['var3'] = 12.1e10 + d['var3'] = totalrows - i + d.append() # This is a 10% faster than table.append() + rowswritten += totalrows + + if recsize == "small": + # Testing with indexing + pass +# table._createIndex("var3", Filters(1,"zlib",shuffle=1)) + + # table.flush() + group._v_attrs.test2 = "just a test" + # Create a new group + group2 = fileh.create_group(group, 'group' + str(j)) + # Iterate over this new group (group2) + group = group2 + table.flush() + + # Close the file (eventually destroy the extended type) + fileh.close() + return (rowswritten, rowsize) + + +def readFile(filename, recsize, verbose): + # Open the HDF5 file in read-only mode + + fileh = open_file(filename, mode="r") + rowsread = 0 + for groupobj in fileh.walk_groups(fileh.root): + # print "Group pathname:", groupobj._v_pathname + row = 0 + for table in fileh.list_nodes(groupobj, 'Table'): + rowsize = table.rowsize + print("reading", table) + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + if recsize == "big" or recsize == "medium": + # e = [ p.float1 for p in table.iterrows() + # if p.grid_i < 2 ] + #e = [ str(p) for p in table.iterrows() ] + # if p.grid_i < 2 ] +# e = [ p['grid_i'] for p in table.iterrows() +# if p['grid_j'] == 20 and p['grid_i'] < 20 ] +# e = [ p['grid_i'] for p in table +# if p['grid_i'] <= 2 ] +# e = [ p['grid_i'] for p in table.where("grid_i<=20")] +# e = [ p['grid_i'] for p in +# table.where('grid_i <= 20')] + e = [p['grid_i'] for p in + table.where('(grid_i <= 20) & (grid_j == 20)')] +# e = [ p['grid_i'] for p in table.iterrows() +# if p.nrow() == 20 ] +# e = [ table.delrow(p.nrow()) for p in table.iterrows() +# if p.nrow() == 20 ] + # The version with a for loop is only 1% better than + # comprenhension list + #e = [] + # for p in table.iterrows(): + # if p.grid_i < 20: + # e.append(p.grid_j) + else: # small record case +# e = [ p['var3'] for p in table.iterrows() +# if p['var2'] < 20 and p['var3'] < 20 ] +# e = [ p['var3'] for p in table.where("var3 <= 20") +# if p['var2'] < 20 ] +# e = [ p['var3'] for p in table.where("var3 <= 20")] +# Cuts 1) and 2) issues the same results but 2) is about 10 times faster +# Cut 1) +# e = [ p.nrow() for p in +# table.where(table.cols.var2 > 5) +# if p["var2"] < 10] +# Cut 2) +# e = [ p.nrow() for p in +# table.where(table.cols.var2 < 10) +# if p["var2"] > 5] +# e = [ (p._nrow,p["var3"]) for p in +# e = [ p["var3"] for p in +# table.where(table.cols.var3 < 10)] +# table.where(table.cols.var3 < 10)] +# table if p["var3"] <= 10] +# e = [ p['var3'] for p in table.where("var3 <= 20")] +# e = [ p['var3'] for p in +# table.where(table.cols.var1 == "10")] # More + # than ten times faster than the next one +# e = [ p['var3'] for p in table +# if p['var1'] == "10"] +# e = [ p['var3'] for p in table.where('var2 <= 20')] + e = [p['var3'] + for p in table.where('(var2 <= 20) & (var2 >= 3)')] + # e = [ p[0] for p in table.where('var2 <= 20')] + #e = [ p['var3'] for p in table if p['var2'] <= 20 ] + # e = [ p[:] for p in table if p[1] <= 20 ] +# e = [ p['var3'] for p in table._whereInRange(table.cols.var2 <=20)] + #e = [ p['var3'] for p in table.iterrows(0,21) ] +# e = [ p['var3'] for p in table.iterrows() +# if p.nrow() <= 20 ] + #e = [ p['var3'] for p in table.iterrows(1,0,1000)] + #e = [ p['var3'] for p in table.iterrows(1,100)] + # e = [ p['var3'] for p in table.iterrows(step=2) + # if p.nrow() < 20 ] + # e = [ p['var2'] for p in table.iterrows() + # if p['var2'] < 20 ] + # for p in table.iterrows(): + # pass + if verbose: + # print "Last record read:", p + print("resulting selection list ==>", e) + + rowsread += table.nrows + row += 1 + if verbose: + print("Total selected records ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize) + + +def readField(filename, field, rng, verbose): + fileh = open_file(filename, mode="r") + rowsread = 0 + if rng is None: + rng = [0, -1, 1] + if field == "all": + field = None + for groupobj in fileh.walk_groups(fileh.root): + for table in fileh.list_nodes(groupobj, 'Table'): + rowsize = table.rowsize + # table.nrowsinbuf = 3 # For testing purposes + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + print("(field, start, stop, step) ==>", (field, rng[0], rng[1], + rng[2])) + + e = table.read(rng[0], rng[1], rng[2], field) + + rowsread += table.nrows + if verbose: + print("Selected rows ==> ", e) + print("Total selected rows ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + fileh.close() + return (rowsread, rowsize) + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + psyco_imported = 1 + except: + psyco_imported = 0 + + import time + + usage = """usage: %s [-v] [-p] [-P] [-R range] [-r] [-w] [-s recsize] [-f field] [-c level] [-l complib] [-i iterations] [-S] [-F] file + -v verbose + -p use "psyco" if available + -P do profile + -R select a range in a field in the form "start,stop,step" + -r only read test + -w only write test + -s use [big] record, [medium] or [small] + -f only read stated field name in tables ("all" means all fields) + -c sets a compression level (do not set it or 0 for no compression) + -S activate shuffling filter + -F activate fletcher32 filter + -l sets the compression library to be used ("zlib", "lzo", "blosc", "bzip2") + -i sets the number of rows in each table\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpPSFR:rwf:s:c:l:i:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + profile = 0 + rng = None + recsize = "medium" + fieldName = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + iterations = 100 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + if option[0] == '-p': + usepsyco = 1 + if option[0] == '-P': + profile = 1 + if option[0] == '-S': + shuffle = 1 + if option[0] == '-F': + fletcher32 = 1 + elif option[0] == '-R': + rng = [int(i) for i in option[1].split(",")] + elif option[0] == '-r': + testwrite = 0 + elif option[0] == '-w': + testread = 0 + elif option[0] == '-f': + fieldName = option[1] + elif option[0] == '-s': + recsize = option[1] + if recsize not in ["big", "medium", "small"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == '-c': + complevel = int(option[1]) + elif option[0] == '-l': + complib = option[1] + elif option[0] == '-i': + iterations = int(option[1]) + + # Build the Filters instance + filters = Filters(complevel=complevel, complib=complib, + shuffle=shuffle, fletcher32=fletcher32) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if verbose: + print("numpy version:", NP.__version__) + if psyco_imported and usepsyco: + print("Using psyco version:", psyco.version_info) + + if testwrite: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(createFile) + if profile: + import profile as prof + import pstats + prof.run( + '(rowsw, rowsz) = createFile(file, iterations, filters, ' + 'recsize)', + 'table-bench.prof') + stats = pstats.Stats('table-bench.prof') + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(20) + else: + (rowsw, rowsz) = createFile(file, iterations, filters, recsize) + t2 = time.time() + cpu2 = time.clock() + tapprows = round(t2 - t1, 3) + cpuapprows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpuapprows / tapprows, 2) * 100) + print("Rows written:", rowsw, " Row size:", rowsz) + print("Time writing rows: %s s (real) %s s (cpu) %s%%" % + (tapprows, cpuapprows, tpercent)) + print("Write rows/sec: ", int(rowsw / float(tapprows))) + print("Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))) + + if testread: + t1 = time.time() + cpu1 = time.clock() + if psyco_imported and usepsyco: + psyco.bind(readFile) + # psyco.bind(readField) + pass + if rng or fieldName: + (rowsr, rowsz) = readField(file, fieldName, rng, verbose) + pass + else: + for i in range(1): + (rowsr, rowsz) = readFile(file, recsize, verbose) + t2 = time.time() + cpu2 = time.clock() + treadrows = round(t2 - t1, 3) + cpureadrows = round(cpu2 - cpu1, 3) + tpercent = int(round(cpureadrows / treadrows, 2) * 100) + print("Rows read:", rowsr, " Row size:", rowsz) + print("Time reading rows: %s s (real) %s s (cpu) %s%%" % + (treadrows, cpureadrows, tpercent)) + print("Read rows/sec: ", int(rowsr / float(treadrows))) + print("Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))) diff --git a/bench/table-copy.py b/bench/table-copy.py new file mode 100644 index 0000000..c249cdf --- /dev/null +++ b/bench/table-copy.py @@ -0,0 +1,116 @@ +from __future__ import print_function +import time + +import numpy as np +import tables + +N = 144000 +#N = 144 + + +def timed(func, *args, **kwargs): + start = time.time() + res = func(*args, **kwargs) + print("%fs elapsed." % (time.time() - start)) + return res + + +def create_table(output_path): + print("creating array...", end=' ') + dt = np.dtype([('field%d' % i, int) for i in range(320)]) + a = np.zeros(N, dtype=dt) + print("done.") + + output_file = tables.open_file(output_path, mode="w") + table = output_file.create_table("/", "test", dt) # , filters=blosc4) + print("appending data...", end=' ') + table.append(a) + print("flushing...", end=' ') + table.flush() + print("done.") + output_file.close() + + +def copy1(input_path, output_path): + print("copying data from %s to %s..." % (input_path, output_path)) + input_file = tables.open_file(input_path, mode="r") + output_file = tables.open_file(output_path, mode="w") + + # copy nodes as a batch + input_file.copy_node("/", output_file.root, recursive=True) + output_file.close() + input_file.close() + + +def copy2(input_path, output_path): + print("copying data from %s to %s..." % (input_path, output_path)) + input_file = tables.open_file(input_path, mode="r") + input_file.copy_file(output_path, overwrite=True) + input_file.close() + + +def copy3(input_path, output_path): + print("copying data from %s to %s..." % (input_path, output_path)) + input_file = tables.open_file(input_path, mode="r") + output_file = tables.open_file(output_path, mode="w") + table = input_file.root.test + table.copy(output_file.root) + output_file.close() + input_file.close() + + +def copy4(input_path, output_path, complib='zlib', complevel=0): + print("copying data from %s to %s..." % (input_path, output_path)) + input_file = tables.open_file(input_path, mode="r") + output_file = tables.open_file(output_path, mode="w") + + input_table = input_file.root.test + print("reading data...", end=' ') + data = input_file.root.test.read() + print("done.") + + filter = tables.Filters(complevel=complevel, complib=complib) + output_table = output_file.create_table("/", "test", input_table.dtype, + filters=filter) + print("appending data...", end=' ') + output_table.append(data) + print("flushing...", end=' ') + output_table.flush() + print("done.") + + input_file.close() + output_file.close() + + +def copy5(input_path, output_path, complib='zlib', complevel=0): + print("copying data from %s to %s..." % (input_path, output_path)) + input_file = tables.open_file(input_path, mode="r") + output_file = tables.open_file(output_path, mode="w") + + input_table = input_file.root.test + + filter = tables.Filters(complevel=complevel, complib=complib) + output_table = output_file.create_table("/", "test", input_table.dtype, + filters=filter) + chunksize = 10000 + rowsleft = len(input_table) + start = 0 + for chunk in range((len(input_table) / chunksize) + 1): + stop = start + min(chunksize, rowsleft) + data = input_table.read(start, stop) + output_table.append(data) + output_table.flush() + rowsleft -= chunksize + start = stop + + input_file.close() + output_file.close() + + +if __name__ == '__main__': + timed(create_table, 'tmp.h5') +# timed(copy1, 'tmp.h5', 'test1.h5') + timed(copy2, 'tmp.h5', 'test2.h5') +# timed(copy3, 'tmp.h5', 'test3.h5') + timed(copy4, 'tmp.h5', 'test4.h5') + timed(copy5, 'tmp.h5', 'test5.h5') diff --git a/bench/undo_redo.py b/bench/undo_redo.py new file mode 100644 index 0000000..e1a1932 --- /dev/null +++ b/bench/undo_redo.py @@ -0,0 +1,234 @@ +########################################################################### +# Benchmark for undo/redo. Run this program without parameters +# for mode of use. +# +# Francesc Alted +# 2005-03-09 +########################################################################### + +from __future__ import print_function +import numpy +from time import time +import tables + +verbose = 0 + + +class BasicBenchmark(object): + + def __init__(self, filename, testname, vecsize, nobjects, niter): + + self.file = filename + self.test = testname + self.vecsize = vecsize + self.nobjects = nobjects + self.niter = niter + + # Initialize the arrays + self.a1 = numpy.arange(0, 1 * self.vecsize) + self.a2 = numpy.arange(1 * self.vecsize, 2 * self.vecsize) + self.a3 = numpy.arange(2 * self.vecsize, 3 * self.vecsize) + + def setUp(self): + + # Create an HDF5 file + self.fileh = tables.open_file(self.file, mode="w") + # open the do/undo + self.fileh.enable_undo() + + def tearDown(self): + self.fileh.disable_undo() + self.fileh.close() + # Remove the temporary file + # os.remove(self.file) + + def createNode(self): + """Checking a undo/redo create_array.""" + + for i in range(self.nobjects): + # Create a new array + self.fileh.create_array('/', 'array' + str(i), self.a1) + # Put a mark + self.fileh.mark() + # Unwind all marks sequentially + for i in range(self.niter): + t1 = time() + for i in range(self.nobjects): + self.fileh.undo() + if verbose: + print("u", end=' ') + if verbose: + print() + undo = time() - t1 + # Rewind all marks sequentially + t1 = time() + for i in range(self.nobjects): + self.fileh.redo() + if verbose: + print("r", end=' ') + if verbose: + print() + redo = time() - t1 + + print("Time for Undo, Redo (createNode):", undo, "s, ", redo, "s") + + def copy_children(self): + """Checking a undo/redo copy_children.""" + + # Create a group + self.fileh.create_group('/', 'agroup') + # Create several objects there + for i in range(10): + # Create a new array + self.fileh.create_array('/agroup', 'array' + str(i), self.a1) + # Excercise copy_children + for i in range(self.nobjects): + # Create another group for destination + self.fileh.create_group('/', 'anothergroup' + str(i)) + # Copy children from /agroup to /anothergroup+i + self.fileh.copy_children('/agroup', '/anothergroup' + str(i)) + # Put a mark + self.fileh.mark() + # Unwind all marks sequentially + for i in range(self.niter): + t1 = time() + for i in range(self.nobjects): + self.fileh.undo() + if verbose: + print("u", end=' ') + if verbose: + print() + undo = time() - t1 + # Rewind all marks sequentially + t1 = time() + for i in range(self.nobjects): + self.fileh.redo() + if verbose: + print("r", end=' ') + if verbose: + print() + redo = time() - t1 + + print(("Time for Undo, Redo (copy_children):", undo, "s, ", + redo, "s")) + + def set_attr(self): + """Checking a undo/redo for setting attributes.""" + + # Create a new array + self.fileh.create_array('/', 'array', self.a1) + for i in range(self.nobjects): + # Set an attribute + setattr(self.fileh.root.array.attrs, "attr" + str(i), str(self.a1)) + # Put a mark + self.fileh.mark() + # Unwind all marks sequentially + for i in range(self.niter): + t1 = time() + for i in range(self.nobjects): + self.fileh.undo() + if verbose: + print("u", end=' ') + if verbose: + print() + undo = time() - t1 + # Rewind all marks sequentially + t1 = time() + for i in range(self.nobjects): + self.fileh.redo() + if verbose: + print("r", end=' ') + if verbose: + print() + redo = time() - t1 + + print("Time for Undo, Redo (set_attr):", undo, "s, ", redo, "s") + + def runall(self): + + if testname == "all": + tests = [self.createNode, self.copy_children, self.set_attr] + elif testname == "createNode": + tests = [self.createNode] + elif testname == "copy_children": + tests = [self.copy_children] + elif testname == "set_attr": + tests = [self.set_attr] + for meth in tests: + self.setUp() + meth() + self.tearDown() + + +if __name__ == '__main__': + import sys + import getopt + + usage = """usage: %s [-v] [-p] [-t test] [-s vecsize] [-n niter] datafile + -v verbose (total dump of profiling) + -p do profiling + -t {createNode|copy_children|set_attr|all} run the specified test + -s the size of vectors that are undone/redone + -n number of objects in operations + -i number of iterations for reading\n""" % sys.argv[0] + + try: + opts, pargs = getopt.getopt(sys.argv[1:], 'vpt:s:n:i:') + except: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + profile = 0 + testname = "all" + vecsize = 10 + nobjects = 1 + niter = 1 + + # Get the options + for option in opts: + if option[0] == '-v': + verbose = 1 + elif option[0] == '-p': + profile = 1 + elif option[0] == '-t': + testname = option[1] + if testname not in ['createNode', 'copy_children', 'set_attr', + 'all']: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == '-s': + vecsize = int(option[1]) + elif option[0] == '-n': + nobjects = int(option[1]) + elif option[0] == '-i': + niter = int(option[1]) + + filename = pargs[0] + + bench = BasicBenchmark(filename, testname, vecsize, nobjects, niter) + if profile: + import hotshot + import hotshot.stats + prof = hotshot.Profile("do_undo.prof") + prof.runcall(bench.runall) + prof.close() + stats = hotshot.stats.load("do_undo.prof") + stats.strip_dirs() + stats.sort_stats('time', 'calls') + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + bench.runall() + +# Local Variables: +# mode: python +# End: diff --git a/bench/undo_redo.txt b/bench/undo_redo.txt new file mode 100644 index 0000000..8f0890e --- /dev/null +++ b/bench/undo_redo.txt @@ -0,0 +1,103 @@ +Benchmarks on PyTables Undo/Redo +================================ + +This is a small report for the performance of the Undo/Redo feature in +PyTables. + +A small script (see undo_redo.py) has been made in order to check +different scenarios for Undo/Redo, like creating single nodes, copying +children from one group to another, and creating attributes. + +Undo/Redo is independent of object size +--------------------------------------- + +Firstly, one thing to be noted is that the Undo/Redo feature is +independent of the object size that is being treated. For example, the +times for 10 objects (flag -n) each one with 10 elements (flag -s) is: + +$ time python2.4 undo_redo.py -n 10 -i 2 -s 10 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 0.213686943054 s, 0.0727670192719 s +Time for Undo, Redo (createNode): 0.271666049957 s, 0.0740389823914 s +Time for Undo, Redo (copy_children): 0.296227931976 s, 0.161941051483 s +Time for Undo, Redo (copy_children): 0.363519906998 s, 0.162662982941 s +Time for Undo, Redo (set_attr): 0.208750009537 s, 0.0732419490814 s +Time for Undo, Redo (set_attr): 0.27628993988 s, 0.0736088752747 s + +real 0m5.557s +user 0m4.354s +sys 0m0.729s + +Note how all tests take more or less the same amount of time. This is +because a move operation is used as a central tool to implement the +Undo/Redo feature. Such a move operation has a constant cost, +independently of the size of the objects. For example, using objects +with 1000 elements, we can see that this does not affect the Undo/Redo +speed: + +$ time python2.4 undo_redo.py -n 10 -i 2 -s 1000 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 0.213760137558 s, 0.0717759132385 s +Time for Undo, Redo (createNode): 0.276151895523 s, 0.0724079608917 s +Time for Undo, Redo (copy_children): 0.308417797089 s, 0.168260812759 s +Time for Undo, Redo (copy_children): 0.382102966309 s, 0.168042898178 s +Time for Undo, Redo (set_attr): 0.209735155106 s, 0.0740969181061 s +Time for Undo, Redo (set_attr): 0.279798984528 s, 0.0770981311798 s + +real 0m5.835s +user 0m4.585s +sys 0m0.736s + + +Undo/Redo times grow linearly with the number of objects implied +---------------------------------------------------------------- + +Secondly, the time for doing/undoing is obviously proportional +(linearly) to the number of objects that are implied in that process +(set by -n): + +$ time python2.4 undo_redo.py -n 100 -i 2 -s 10 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 2.27267885208 s, 0.779091119766 s +Time for Undo, Redo (createNode): 2.31264209747 s, 0.766252040863 s +Time for Undo, Redo (copy_children): 3.01871585846 s, 1.63346219063 s +Time for Undo, Redo (copy_children): 3.07704997063 s, 1.62615203857 s +Time for Undo, Redo (set_attr): 2.18017196655 s, 0.809293985367 s +Time for Undo, Redo (set_attr): 2.23039293289 s, 0.809432029724 s + +real 0m48.395s +user 0m40.385s +sys 0m6.914s + + +A note on actual performance and place for improvement +------------------------------------------------------ + +Finally, note how the Undo/Redo capability of PyTables is pretty +fast. The next benchmark makes 1000 undo and 1000 redos for +create_array: + +$ time python2.4 undo_redo.py -n 1000 -i 2 -t createNode -s 1000 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 22.7840828896 s, 7.9872610569 s +Time for Undo, Redo (createNode): 22.2799329758 s, 7.95833396912 s + +real 1m32.307s +user 1m16.598s +sys 0m15.105s + +i.e. an undo takes 23 milliseconds while a redo takes 8 milliseconds +approximately. + +The fact that undo operations take 3 times more than redo is probably +due to how the action log is implemented. The action log has been +implemented as a Table object, and PyTables has been optimized to read +rows of tables in *forward* direction (the one needed for redo +operations). However, when looking in *backward* direction (needed for +undo operations), the internal cache of PyTables is counterproductive +and makes look-ups quite slow (compared with forward access). +Nevertheless, the code for Undo/Redo has been optimized quite a bit to +smooth this kind of access as much as possible, but with a relative +success. A more definitive optimization should involve getting much +better performance for reading tables in backward direction. That +would be a major task, and can be eventually addressed in the future. + + +Francesc Alted +2005-03-10 diff --git a/bench/widetree.py b/bench/widetree.py new file mode 100644 index 0000000..c5f6651 --- /dev/null +++ b/bench/widetree.py @@ -0,0 +1,127 @@ +from __future__ import print_function +import hotshot +import hotshot.stats + +import unittest +import os +import tempfile + +from tables import * + +verbose = 0 + + +class WideTreeTestCase(unittest.TestCase): + """Checks for maximum number of childs for a Group.""" + + def test00_Leafs(self): + """Checking creation of large number of leafs (1024) per group. + + Variable 'maxchilds' controls this check. PyTables support up to + 4096 childs per group, but this would take too much memory (up + to 64 MB) for testing purposes (may be we can add a test for big + platforms). A 1024 childs run takes up to 30 MB. A 512 childs + test takes around 25 MB. + + """ + + import time + maxchilds = 1000 + if verbose: + print('\n', '-=' * 30) + print("Running %s.test00_wideTree..." % self.__class__.__name__) + print("Maximum number of childs tested :", maxchilds) + # Open a new empty HDF5 file + #file = tempfile.mktemp(".h5") + file = "test_widetree.h5" + + fileh = open_file(file, mode="w") + if verbose: + print("Children writing progress: ", end=' ') + for child in range(maxchilds): + if verbose: + print("%3d," % (child), end=' ') + a = [1, 1] + fileh.create_group(fileh.root, 'group' + str(child), + "child: %d" % child) + fileh.create_array("/group" + str(child), 'array' + str(child), + a, "child: %d" % child) + if verbose: + print() + # Close the file + fileh.close() + + t1 = time.time() + # Open the previous HDF5 file in read-only mode + fileh = open_file(file, mode="r") + print(("\nTime spent opening a file with %d groups + %d arrays: " + "%s s" % (maxchilds, maxchilds, time.time() - t1))) + if verbose: + print("\nChildren reading progress: ", end=' ') + # Close the file + fileh.close() + # Then, delete the file + # os.remove(file) + + def test01_wideTree(self): + """Checking creation of large number of groups (1024) per group. + + Variable 'maxchilds' controls this check. PyTables support up to + 4096 childs per group, but this would take too much memory (up + to 64 MB) for testing purposes (may be we can add a test for big + platforms). A 1024 childs run takes up to 30 MB. A 512 childs + test takes around 25 MB. + + """ + + import time + maxchilds = 1000 + if verbose: + print('\n', '-=' * 30) + print("Running %s.test00_wideTree..." % self.__class__.__name__) + print("Maximum number of childs tested :", maxchilds) + # Open a new empty HDF5 file + file = tempfile.mktemp(".h5") + #file = "test_widetree.h5" + + fileh = open_file(file, mode="w") + if verbose: + print("Children writing progress: ", end=' ') + for child in range(maxchilds): + if verbose: + print("%3d," % (child), end=' ') + fileh.create_group(fileh.root, 'group' + str(child), + "child: %d" % child) + if verbose: + print() + # Close the file + fileh.close() + + t1 = time.time() + # Open the previous HDF5 file in read-only mode + fileh = open_file(file, mode="r") + print("\nTime spent opening a file with %d groups: %s s" % + (maxchilds, time.time() - t1)) + # Close the file + fileh.close() + # Then, delete the file + os.remove(file) + +#---------------------------------------------------------------------- + + +def suite(): + theSuite = unittest.TestSuite() + theSuite.addTest(unittest.makeSuite(WideTreeTestCase)) + + return theSuite + + +if __name__ == '__main__': + prof = hotshot.Profile("widetree.prof") + benchtime, stones = prof.runcall(unittest.main(defaultTest='suite')) + prof.close() + stats = hotshot.stats.load("widetree.prof") + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(20) diff --git a/bench/widetree2.py b/bench/widetree2.py new file mode 100644 index 0000000..34f044e --- /dev/null +++ b/bench/widetree2.py @@ -0,0 +1,115 @@ +from __future__ import print_function +import unittest + +from tables import * +# Next imports are only necessary for this test suite +#from tables import Group, Leaf, Table, Array + +verbose = 0 + + +class Test(IsDescription): + ngroup = Int32Col(pos=1) + ntable = Int32Col(pos=2) + nrow = Int32Col(pos=3) + #string = StringCol(itemsize=500, pos=4) + + +class WideTreeTestCase(unittest.TestCase): + + def test00_Leafs(self): + + # Open a new empty HDF5 file + filename = "test_widetree.h5" + ngroups = 10 + ntables = 300 + nrows = 10 + complevel = 0 + complib = "lzo" + + print("Writing...") + # Open a file in "w"rite mode + fileh = open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", 'group%04d' % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + for k in range(ngroups): + print("Filling tables in group:", k) + fileh = open_file(filename, mode="a", root_uep='group%04d' % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + table = fileh.create_table(group, 'table%04d' % j, Test, + 'Table%04d' % j, + Filters(complevel, complib), nrows) + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row['ngroup'] = k + row['ntable'] = j + row['nrow'] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + # read the file + print("Reading...") + rowsread = 0 + for ngroup in range(ngroups): + fileh = open_file(filename, mode="r", root_uep='group%04d' % + ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, 'Table'): + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + for row in table: + try: + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + except: + print("Error in group: %d, table: %d, row: %d" % + (ngroup, ntable, nrow)) + print("Record ==>", row) + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + +#---------------------------------------------------------------------- +def suite(): + theSuite = unittest.TestSuite() + theSuite.addTest(unittest.makeSuite(WideTreeTestCase)) + + return theSuite + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/bench/woody-pentiumIV.txt b/bench/woody-pentiumIV.txt new file mode 100644 index 0000000..a82baaf --- /dev/null +++ b/bench/woody-pentiumIV.txt @@ -0,0 +1,189 @@ +This is for Debian woody! + +Below are some benchmarking figures obtained while reading and writing +to a file with three tables, each table containing 10000 records. For +reference, the same tests have been repeated using the shelve module +that comes with Python. The tests were conducted on a platform with a +2 GHz AMD Athlon chip, an IDE disk at 4600 rpm, and 256 MB of RAM. + +Version 0.2 + + | medium size records | small size records + | (47 Bytes) | (16 Bytes) + +---------------------------+------------------------------ + | rows/s filesize | rows/s filesize + | write read | write read +------------+---------------------------+------------------------------ + no compress| | + record | 24400 39000 1184 KB | 32600 52600 506 KB + tupla | 17100 81100 1184 KB | 66666 107142 506 KB +------------+---------------------------+------------------------------ + compress | | + record | 22200 37500 494 KB | 31900 51700 94 KB + tupla | 16100 75000 494 KB | 63900 107142 94 KB +------------+---------------------------+------------------------------ + Shelve | 25800 14400 2500 KB | 68200 17000 921 KB + +New version (15-Jan-2003) + + + PyTables pre-0.3 + +Rec length | rows/s | KB/s | rows | filesz | memory | + | write read | write read | | (MB) | (MB) | +------------+-----------------+-----------------+-------+--------+--------+ + 16 B | 31000 166600 | 480 2600 | 3.e4 | 0.49| 6.5 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B | 17300 136000 | 942 7460 | 3.e4 | 1.7 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B* | 1560 136000 | 85 7560 | 3.e4 | 1.7 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 64 B* | 1540 130000 | 96 8152 | 3.e4 | 1.9 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B* | 879 81100 | 472 43500 | 3.e4 | 19 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B** | 12000 103000 | 6440 55400 | 3.e5 | 168 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B** | 15500 81100 | 8350 43500 | 3.e4 | 19 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B**c| 909 1100 | 490 1081 | 3.e4 | 0.76| 8.5 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B***| 3600 81100 | 1950 43500 | 3.e4 | 19 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + +* These are figures obtained with a numarray as part of the record +** The numarray record fields are not set in each iteration +*** Some numarray elements of a record field are changed on each iteration +**c Like ** but with compression (level 1) + + +New version (10-March-2003) + + PyTables pre-0.4 + +Rec | rows/s | KB/s | rows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |434000 469000 | 6800 7300 | 3.e4 | 0.49| 6.5 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 Bc |326000 435000 | 5100 6800 | 3.e4 | 0.12| 6.5 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |663000 728000 | 10400 11400 | 3.e5 | 4.7 | 7.0 | 99|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |679000 797000 | 10600 12500 | 3.e6 | 46.0 | 10.0 | 98| 98 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 Bc |452000 663000 | 7100 10400 | 3.e6 | 9.3 | 10.0 | 98| 98 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |576000 590000 | 9000 9200 | 3.e7 | 458.0 | 11.0 | 78| 76 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 3050 380000 | 163 20700 | 3.e4 | 1.7 | 7.2 | 98|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |194000 340000 | 10600 18600 | 3.e4 | 1.7 | 7.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B*c |142000 306000 | 7800 16600 | 3.e4 | 0.3 | 7.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |273600 589000 | 14800 32214 | 3.e5 | 16.0 | 9.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B*c |184000 425000 | 10070 23362 | 3.e5 | 2.7 | 9.7 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |203600 649000 | 11100 35500 | 3.e6 | 161.0 | 12.0 | 72| 99 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |184000 229000 | 10000 12500 | 1.e7 | 534.0 | 17.0 | 56| 40 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B*np|184000 229000 | 10000 12500 | 1.e7 | 534.0 | 17.0 | 56| 40 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B | 2230 143000 | 1195 76600 | 3.e4 | 19 | 9.4 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B* | 76000 250000 | 40900 134000 | 3.e4 | 19 | 9.4 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B*c | 13900 30000 | 7400 16100 | 3.e4 | 0.7 | 10.0 | 99| 99 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B* | 25400 325000 | 13600 174000 | 3.e5 | 167 | 11.0 | 71| 96 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B* | 18700 28000 | 10000 15100 | 6.e5 | 322 | 13.0 | 76| 9 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B*c | 7300 21000 | 3900 11300 | 6.e5 | 11 | 17.0 | 98| 99 +--------+-----------------+-----------------+-------+--------+--------+---+---- + +* These are figures obtained with a numarray as part of the record +** The numarray record fields are not set in each iteration +c With compression (level 1) +np No psyco optimizations + + Shelve + +Rec length | rows/s | KB/s | rows | filesz | memory | + | write read | write read | | (MB) | (MB) | +------------+-----------------+-----------------+-------+--------+--------+ + 16 B | 68200 17000 | 1070 266 | 3.e4 | 0.94| 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B | 25000 14400 | 1367 784 | 3.e4 | 2.5 | 10.6 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B* | 2980 2710 | 162 148 | 3.e4 | 7.3 | 33 | +------------+-----------------+-----------------+-------+--------+--------+ + 64 B* | 2900 2700 | 182 168 | 3.e4 | 7.5 | 33 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B* | 1090 1310 | 590 710 | 3.e4 | 58 | 122 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B** | 16000 14900 | 2400 1200 | 3.e4 | 2.4 | 10.6 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B***| 28000 11900 | 2400 1100 | 3.e4 | 2.5 | 10.6 | +------------+-----------------+-----------------+-------+--------+--------+ + +* These are figures obtained with a numarray as part of the record +** The nuamrray records are not set on each iteration +*** Some numarray elements of a record field are changed on each iteration + + + Python cPickle & bsddb3 RECNO with variable length + +Rec | Krows/s | MB/s | Krows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 23.0 4.3 | 0.65 0.12 | 30 | 2.3 | 6.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 22.0 4.3 | 0.60 0.12 | 300 | 24 | 25.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 12.3 2.0 | 0.68 0.11 | 30 | 5.8 | 6.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 8.8 2.0 | 0.44 0.11 | 300 | 61 | 6.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + + + Python struct & bsddb3 RECNO with fixed length + +Rec | Krows/s | MB/s | Krows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 61 71 | 1.6 1.9 | 30 | 1.0 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 56 65 | 1.5 1.8 | 300 | 10 | 5.8 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 51 61 | 1.4 1.6 | 3000 | 100 | 6.1 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 51 52 | 2.7 2.8 | 30 | 1.8 | 5.8 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 18 50 | 1.0 2.7 | 300 | 18 | 6.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 16 48 | 0.9 2.6 | 1000 | 61 | 6.5 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + + + PySqlLite + +Rec | rows/s | KB/s | rows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 4290 1400000 | 200 48000 | 3.e4 | 1.4 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 3660 1030000 | 182 51000 | 3.e5 | 15 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 3580 230000 | 192 12380 | 6.e6 | 322 | 5.0 |100| 25 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 2990 882000 | 250 76000 | 3.e4 | 2.6 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 2900 857000 | 270 80000 | 3.e5 | 28 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 2900 120000 | 302 13100 | 3.e6 | 314 | 5.0 |100| 11 +--------+-----------------+-----------------+-------+--------+--------+---+---- + diff --git a/c-blosc/ANNOUNCE.rst b/c-blosc/ANNOUNCE.rst new file mode 100644 index 0000000..119a796 --- /dev/null +++ b/c-blosc/ANNOUNCE.rst @@ -0,0 +1,54 @@ +=============================================================== + Announcing C-Blosc 1.14.3 + A blocking, shuffling and lossless compression library for C +=============================================================== + +What is new? +============ + +The main change for this release is that the pthreads library is +not needed on any Windows build anymore. Thanks to Steven G. Johnson. +Also, the internal Zstd sources have been updated to 1.3.4; expect +an important performance boost (can be up to 10%, specially for low +compression levels). + +For more info, please see the release notes in: + +https://github.com/Blosc/c-blosc/blob/master/RELEASE_NOTES.rst + + +What is it? +=========== + +Blosc (http://www.blosc.org) is a high performance meta-compressor +optimized for binary data. It has been designed to transmit data to +the processor cache faster than the traditional, non-compressed, +direct memory fetch approach via a memcpy() OS call. + +Blosc has internal support for different compressors like its internal +BloscLZ, but also LZ4, LZ4HC, Snappy, Zlib and Zstd. This way these can +automatically leverage the multithreading and pre-filtering +(shuffling) capabilities that comes with Blosc. + + +Download sources +================ + +The github repository is over here: + +https://github.com/Blosc + +Blosc is distributed using the BSD license, see LICENSES/BLOSC.txt for +details. + + +Mailing list +============ + +There is an official Blosc mailing list at: + +blosc@googlegroups.com +http://groups.google.es/group/blosc + + +Enjoy Data! diff --git a/c-blosc/CMakeLists.txt b/c-blosc/CMakeLists.txt new file mode 100644 index 0000000..8be39d1 --- /dev/null +++ b/c-blosc/CMakeLists.txt @@ -0,0 +1,358 @@ +# CMake build system for Blosc +# ============================ +# +# Available options: +# +# BUILD_STATIC: default ON +# build the static version of the Blosc library +# BUILD_SHARED: default ON +# build the shared library version of the Blosc library +# BUILD_TESTS: default ON +# build test programs and generates the "test" target +# BUILD_BENCHMARKS: default ON +# build the benchmark program +# DEACTIVATE_AVX2: default OFF +# do not attempt to build with AVX2 instructions +# DEACTIVATE_LZ4: default OFF +# do not include support for the LZ4 library +# DEACTIVATE_SNAPPY: default OFF +# do not include support for the Snappy library +# DEACTIVATE_ZLIB: default OFF +# do not include support for the Zlib library +# DEACTIVATE_ZSTD: default OFF +# do not include support for the Zstd library +# PREFER_EXTERNAL_LZ4: default OFF +# when found, use the installed LZ4 libs instead of included +# sources +# PREFER_EXTERNAL_SNAPPY: default OFF +# when found, use the installed Snappy libs instead of included +# sources +# PREFER_EXTERNAL_ZLIB: default OFF +# when found, use the installed zlib libs instead of included +# sources +# PREFER_EXTERNAL_ZSTD: default OFF +# when found, use the installed zstd libs instead of included +# sources +# TEST_INCLUDE_BENCH_SHUFFLE_1: default ON +# add a test that runs the benchmark program passing "shuffle" with 1 +# thread as second parameter +# TEST_INCLUDE_BENCH_SHUFFLE_N: default ON +# add a test that runs the benchmark program passing "shuffle" with all +# threads as second parameter +# TEST_INCLUDE_BENCH_BITSHUFFLE_1: default ON +# add a test that runs the benchmark program passing "bitshuffle" with 1 +# thread as second parameter +# TEST_INCLUDE_BENCH_BITSHUFFLE_N: default ON +# add a test that runs the benchmark program passing "bitshuffle" with +# all threads as second parameter +# TEST_INCLUDE_BENCH_SUITE: default OFF +# add a test that runs the benchmark program passing "suite" +# as first parameter +# TEST_INCLUDE_BENCH_SUITE_PARALLEL: default OFF +# add a test that runs the benchmark program passing "parallel" +# as first parameter +# TEST_INCLUDE_BENCH_HARDSUITE: default OFF +# add a test that runs the benchmark program passing "hardsuite" +# as first parameter +# TEST_INCLUDE_BENCH_EXTREMESUITE: default OFF +# add a test that runs the benchmark program passing "extremesuite" +# as first parameter +# TEST_INCLUDE_BENCH_DEBUGSUITE: default OFF +# add a test that runs the benchmark program passing "debugsuite" +# as first parameter +# +# Components: +# +# LIB: includes blosc.so +# DEV: static includes blosc.a and blosc.h + + +cmake_minimum_required(VERSION 2.8.12) +if (NOT CMAKE_VERSION VERSION_LESS 3.3) + cmake_policy(SET CMP0063 NEW) +endif() +project(blosc) + +# parse the full version numbers from blosc.h +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/blosc/blosc.h _blosc_h_contents) +string(REGEX REPLACE ".*#define[ \t]+BLOSC_VERSION_MAJOR[ \t]+([0-9]+).*" + "\\1" BLOSC_VERSION_MAJOR ${_blosc_h_contents}) +string(REGEX REPLACE ".*#define[ \t]+BLOSC_VERSION_MINOR[ \t]+([0-9]+).*" + "\\1" BLOSC_VERSION_MINOR ${_blosc_h_contents}) +string(REGEX REPLACE ".*#define[ \t]+BLOSC_VERSION_RELEASE[ \t]+([0-9]+).*" + "\\1" BLOSC_VERSION_PATCH ${_blosc_h_contents}) +string(REGEX REPLACE ".*#define[ \t]+BLOSC_VERSION_STRING[ \t]+\"([-0-9A-Za-z.]+)\".*" + "\\1" BLOSC_VERSION_STRING ${_blosc_h_contents}) + +message("Configuring for Blosc version: " ${BLOSC_VERSION_STRING}) + +# options +option(BUILD_STATIC + "Build a static version of the blosc library." ON) +option(BUILD_SHARED + "Build a shared library version of the blosc library." ON) +option(BUILD_TESTS + "Build test programs form the blosc compression library" ON) +option(BUILD_BENCHMARKS + "Build benchmark programs form the blosc compression library" ON) +option(DEACTIVATE_AVX2 + "Do not attempt to build with AVX2 instructions" OFF) +option(DEACTIVATE_LZ4 + "Do not include support for the LZ4 library." OFF) +option(DEACTIVATE_SNAPPY + "Do not include support for the Snappy library." OFF) +option(DEACTIVATE_ZLIB + "Do not include support for the Zlib library." OFF) +option(DEACTIVATE_ZSTD + "Do not include support for the Zstd library." OFF) +option(PREFER_EXTERNAL_LZ4 + "Find and use external LZ4 library instead of included sources." OFF) +option(PREFER_EXTERNAL_SNAPPY + "Find and use external Snappy library instead of included sources." OFF) +option(PREFER_EXTERNAL_ZLIB + "Find and use external Zlib library instead of included sources." OFF) +option(PREFER_EXTERNAL_ZSTD + "Find and use external Zstd library instead of included sources." OFF) + +set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") + + +if(NOT DEACTIVATE_LZ4) + if(PREFER_EXTERNAL_LZ4) + find_package(LZ4) + else() + message(STATUS "Using LZ4 internal sources.") + endif(PREFER_EXTERNAL_LZ4) + # HAVE_LZ4 will be set to true because even if the library is + # not found, we will use the included sources for it + set(HAVE_LZ4 TRUE) +endif(NOT DEACTIVATE_LZ4) + +if(NOT DEACTIVATE_SNAPPY) + if(PREFER_EXTERNAL_SNAPPY) + find_package(Snappy) + else() + message(STATUS "Using Snappy internal sources.") + endif(PREFER_EXTERNAL_SNAPPY) + # HAVE_SNAPPY will be set to true because even if the library is not found, + # we will use the included sources for it + set(HAVE_SNAPPY TRUE) +endif(NOT DEACTIVATE_SNAPPY) + +if(NOT DEACTIVATE_ZLIB) + # import the ZLIB_ROOT environment variable to help finding the zlib library + if(PREFER_EXTERNAL_ZLIB) + set(ZLIB_ROOT $ENV{ZLIB_ROOT}) + find_package(ZLIB) + if (NOT ZLIB_FOUND ) + message(STATUS "No zlib found. Using internal sources.") + endif (NOT ZLIB_FOUND ) + else() + message(STATUS "Using zlib internal sources.") + endif(PREFER_EXTERNAL_ZLIB) + # HAVE_ZLIB will be set to true because even if the library is not found, + # we will use the included sources for it + set(HAVE_ZLIB TRUE) +endif(NOT DEACTIVATE_ZLIB) + +if (NOT DEACTIVATE_ZSTD) + if (PREFER_EXTERNAL_ZSTD) + find_package(Zstd) + else () + message(STATUS "Using ZSTD internal sources.") + endif (PREFER_EXTERNAL_ZSTD) + # HAVE_ZSTD will be set to true because even if the library is + # not found, we will use the included sources for it + set(HAVE_ZSTD TRUE) +endif (NOT DEACTIVATE_ZSTD) + +# create the config.h file +configure_file ("blosc/config.h.in" "blosc/config.h" ) + +# now make sure that you set the build directory on your "Include" path when compiling +include_directories("${PROJECT_BINARY_DIR}/blosc/") + +# If the build type is not set, default to Release. +set(BLOSC_DEFAULT_BUILD_TYPE Release) +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "No build type specified. Defaulting to '${BLOSC_DEFAULT_BUILD_TYPE}'.") + set(CMAKE_BUILD_TYPE ${BLOSC_DEFAULT_BUILD_TYPE} CACHE STRING + "Choose the type of build." FORCE) + + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS + "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +if(APPLE) + # enable @rpath in the install name for any shared library being built. See #175. + set(CMAKE_MACOSX_RPATH TRUE) +endif() + +# Based on the target system's processor and the compiler being used, +# set build variables indicating which hardware features can be targeted +# by the compiler. Note we DO NOT check which hardware features are supported +# by this (the host) system, because we want to be able to support compiling +# for newer hardware on older machines as well as cross-compilation. +message(STATUS "Building for system processor ${CMAKE_SYSTEM_PROCESSOR}") +if(CMAKE_SYSTEM_PROCESSOR STREQUAL i386 OR + CMAKE_SYSTEM_PROCESSOR STREQUAL i686 OR + CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR + CMAKE_SYSTEM_PROCESSOR STREQUAL amd64 OR + CMAKE_SYSTEM_PROCESSOR STREQUAL AMD64) + if(CMAKE_C_COMPILER_ID STREQUAL GNU) + # We need C99 (GNU99 more exactly) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99") + set(COMPILER_SUPPORT_SSE2 TRUE) + if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 4.7 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 4.7) + set(COMPILER_SUPPORT_AVX2 TRUE) + else() + set(COMPILER_SUPPORT_AVX2 FALSE) + endif() + elseif(CMAKE_C_COMPILER_ID STREQUAL Clang) + set(COMPILER_SUPPORT_SSE2 TRUE) + if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 3.2 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 3.2) + set(COMPILER_SUPPORT_AVX2 TRUE) + else() + set(COMPILER_SUPPORT_AVX2 FALSE) + endif() + elseif(CMAKE_C_COMPILER_ID STREQUAL Intel) + set(COMPILER_SUPPORT_SSE2 TRUE) + if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 14.0 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 14.0) + # icc (ICC) 15.0.3 does not work compiling AVX2 code + # (perhaps my machine does not have AVX2 and the compiler + # cannot generate code for that?) + set(COMPILER_SUPPORT_AVX2 FALSE) + else() + set(COMPILER_SUPPORT_AVX2 FALSE) + endif() + elseif(MSVC) + set(COMPILER_SUPPORT_SSE2 TRUE) + if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 18.00.30501 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 18.00.30501) + set(COMPILER_SUPPORT_AVX2 TRUE) + else() + set(COMPILER_SUPPORT_AVX2 FALSE) + endif() + else() + set(COMPILER_SUPPORT_SSE2 FALSE) + set(COMPILER_SUPPORT_AVX2 FALSE) + # Unrecognized compiler. Emit a warning message to let the user know hardware-acceleration won't be available. + message(WARNING "Unable to determine which ${CMAKE_SYSTEM_PROCESSOR} hardware features are supported by the C compiler (${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}).") + endif() +else() + # If the target system processor isn't recognized, emit a warning message to alert the user + # that hardware-acceleration support won't be available but allow configuration to proceed. + message(WARNING "Unrecognized system processor ${CMAKE_SYSTEM_PROCESSOR}. Cannot determine which hardware features (${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}) supports, so hardware-accelerated implementations will not be available.") +endif() + +# disable AVX2 if specified +if(DEACTIVATE_AVX2) + set(COMPILER_SUPPORT_AVX2 FALSE) +endif() + +# flags +# Set -Wall and other useful warning flags. +if(CMAKE_C_COMPILER_ID STREQUAL GNU OR CMAKE_C_COMPILER_ID STREQUAL Clang OR CMAKE_C_COMPILER_ID STREQUAL Intel) + add_compile_options(-Wall -Wwrite-strings -Wno-unused-function) +endif(CMAKE_C_COMPILER_ID STREQUAL GNU OR CMAKE_C_COMPILER_ID STREQUAL Clang OR CMAKE_C_COMPILER_ID STREQUAL Intel) + +# @NOTE: -O3 is enabled in Release mode (CMAKE_BUILD_TYPE="Release") + +# Set the "-msse2" build flag if supported. +if(CMAKE_C_COMPILER_ID STREQUAL GNU OR CMAKE_C_COMPILER_ID STREQUAL Clang OR CMAKE_C_COMPILER_ID STREQUAL Intel) + if(COMPILER_SUPPORT_SSE2) + add_compile_options(-msse2) + endif(COMPILER_SUPPORT_SSE2) +endif(CMAKE_C_COMPILER_ID STREQUAL GNU OR CMAKE_C_COMPILER_ID STREQUAL Clang OR CMAKE_C_COMPILER_ID STREQUAL Intel) + +if(MSVC) + if(NOT CMAKE_C_FLAGS) + set(CMAKE_C_FLAGS "/Ox" CACHE STRING "C flags." FORCE) + endif(NOT CMAKE_C_FLAGS) + + # Turn off misguided "secure CRT" warnings in MSVC. + # Microsoft wants people to use the MS-specific _s + # versions of certain C functions but this is difficult to do + # in platform-independent code. + add_definitions( -D_CRT_SECURE_NO_WARNINGS ) +endif(MSVC) + +if(WIN32) + # For some supporting headers + include_directories("${CMAKE_CURRENT_SOURCE_DIR}/blosc") +endif(WIN32) + +if(HAIKU) + # Haiku have posix_memalign, required by test_common.h + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_POSIX_C_SOURCE=200112L") +endif(HAIKU) + +if (NOT DEFINED BLOSC_IS_SUBPROJECT) + if ("^${CMAKE_SOURCE_DIR}$" STREQUAL "^${PROJECT_SOURCE_DIR}$") + set (BLOSC_IS_SUBPROJECT FALSE) + else () + set (BLOSC_IS_SUBPROJECT TRUE) + message(STATUS "Detected that BLOSC is used a subproject.") + endif () +endif () + +if (NOT DEFINED BLOSC_INSTALL) + if (BLOSC_IS_SUBPROJECT) + set(BLOSC_INSTALL FALSE) + else() + set(BLOSC_INSTALL TRUE) + endif() +endif() + + +# subdirectories +add_subdirectory(blosc) + +if(BUILD_TESTS) + enable_testing() + add_subdirectory(tests) + add_subdirectory(compat) +endif(BUILD_TESTS) + +if(BUILD_BENCHMARKS) + add_subdirectory(bench) +endif(BUILD_BENCHMARKS) + + +# uninstall target +if (BLOSC_INSTALL) + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/blosc.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/blosc.pc" + @ONLY) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/blosc.pc" + DESTINATION lib/pkgconfig COMPONENT DEV) + + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" + IMMEDIATE @ONLY) + add_custom_target(uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) +endif() + +# packaging +if (NOT BLOSC_IS_SUBPROJECT) + include(InstallRequiredSystemLibraries) + + set(CPACK_GENERATOR TGZ ZIP) + set(CPACK_SOURCE_GENERATOR TGZ ZIP) + set(CPACK_PACKAGE_VERSION_MAJOR ${BLOSC_VERSION_MAJOR}) + set(CPACK_PACKAGE_VERSION_MINOR ${BLOSC_VERSION_MINOR}) + set(CPACK_PACKAGE_VERSION_PATCH ${BLOSC_VERSION_PATCH}) + set(CPACK_PACKAGE_VERSION ${BLOSC_STRING_VERSION}) + set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md") + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY + "A blocking, shuffling and lossless compression library") + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSES/BLOSC.txt") + set(CPACK_SOURCE_IGNORE_FILES "/build.*;.*~;\\\\.git.*;\\\\.DS_Store") + set(CPACK_STRIP_FILES TRUE) + set(CPACK_SOURCE_STRIP_FILES TRUE) + + include(CPack) +endif() diff --git a/c-blosc/LICENSES/BITSHUFFLE.txt b/c-blosc/LICENSES/BITSHUFFLE.txt new file mode 100644 index 0000000..1365ed6 --- /dev/null +++ b/c-blosc/LICENSES/BITSHUFFLE.txt @@ -0,0 +1,21 @@ +Bitshuffle - Filter for improving compression of typed binary data. + +Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/c-blosc/LICENSES/BLOSC.txt b/c-blosc/LICENSES/BLOSC.txt new file mode 100644 index 0000000..b00eeab --- /dev/null +++ b/c-blosc/LICENSES/BLOSC.txt @@ -0,0 +1,30 @@ +BSD License + +For Blosc - A blocking, shuffling and lossless compression library + +Copyright (C) 2009-2017 Francesc Alted + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Francesc Alted nor the names of its contributors may be used + to endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/c-blosc/LICENSES/FASTLZ.txt b/c-blosc/LICENSES/FASTLZ.txt new file mode 100644 index 0000000..4a6abd6 --- /dev/null +++ b/c-blosc/LICENSES/FASTLZ.txt @@ -0,0 +1,24 @@ +FastLZ - lightning-fast lossless compression library + +Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) +Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) +Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/c-blosc/LICENSES/LZ4.txt b/c-blosc/LICENSES/LZ4.txt new file mode 100644 index 0000000..2383e10 --- /dev/null +++ b/c-blosc/LICENSES/LZ4.txt @@ -0,0 +1,32 @@ +LZ4 - Fast LZ compression algorithm + +Copyright (C) 2011-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html +- LZ4 source repository : http://code.google.com/p/lz4/ + diff --git a/c-blosc/LICENSES/SNAPPY.txt b/c-blosc/LICENSES/SNAPPY.txt new file mode 100644 index 0000000..8d6bd9f --- /dev/null +++ b/c-blosc/LICENSES/SNAPPY.txt @@ -0,0 +1,28 @@ +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/c-blosc/LICENSES/STDINT.txt b/c-blosc/LICENSES/STDINT.txt new file mode 100644 index 0000000..c28001d --- /dev/null +++ b/c-blosc/LICENSES/STDINT.txt @@ -0,0 +1,29 @@ +ISO C9x compliant stdint.h for Microsoft Visual Studio +Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 + + Copyright (c) 2006-2013 Alexander Chemeris + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the product nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/c-blosc/LICENSES/ZLIB.txt b/c-blosc/LICENSES/ZLIB.txt new file mode 100644 index 0000000..5d74f5c --- /dev/null +++ b/c-blosc/LICENSES/ZLIB.txt @@ -0,0 +1,22 @@ +Copyright notice: + + (C) 1995-2013 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu diff --git a/c-blosc/README.md b/c-blosc/README.md new file mode 100644 index 0000000..894fa0a --- /dev/null +++ b/c-blosc/README.md @@ -0,0 +1,196 @@ +# Blosc: A blocking, shuffling and lossless compression library +| Author | Contact | URL | +|--------|---------|-----| +| Francesc Alted | francesc@blosc.org | http://www.blosc.org | + +| Gitter | Travis CI | Appveyor | +|--------|-----------|----------| +| [![Build Status](https://badges.gitter.im/Blosc/c-blosc.svg)](https://gitter.im/Blosc/c-blosc?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [![Build Status](https://travis-ci.org/Blosc/c-blosc.svg?branch=master)](https://travis-ci.org/Blosc/c-blosc) | [![Build Status](https://ci.appveyor.com/api/projects/status/3mlyjc1ak0lbkmte?svg=true)](https://ci.appveyor.com/project/FrancescAlted/c-blosc/branch/master) | + + +## What is it? + +Blosc is a high performance compressor optimized for binary data. +It has been designed to transmit data to the processor cache faster +than the traditional, non-compressed, direct memory fetch approach via +a memcpy() OS call. Blosc is the first compressor (that I'm aware of) +that is meant not only to reduce the size of large datasets on-disk or +in-memory, but also to accelerate memory-bound computations. + +It uses the [blocking technique](http://blosc.org/docs/StarvingCPUs-CISE-2010.pdf) +so as to reduce activity in the memory bus as much as possible. In short, this +technique works by dividing datasets in blocks that are small enough +to fit in caches of modern processors and perform compression / +decompression there. It also leverages, if available, SIMD +instructions (SSE2, AVX2) and multi-threading capabilities of CPUs, in +order to accelerate the compression / decompression process to a +maximum. + +See some [benchmarks](http://blosc.org/pages/synthetic-benchmarks/) about Blosc performance. + +Blosc is distributed using the BSD license, see LICENSES/BLOSC.txt for +details. + +## Meta-compression and other differences over existing compressors + +C-Blosc is not like other compressors: it should rather be called a +meta-compressor. This is so because it can use different compressors +and filters (programs that generally improve compression ratio). At +any rate, it can also be called a compressor because it happens that +it already comes with several compressor and filters, so it can +actually work like a regular codec. + +Currently C-Blosc comes with support of BloscLZ, a compressor heavily +based on FastLZ (http://fastlz.org/), LZ4 and LZ4HC +(https://github.com/Cyan4973/lz4), Snappy +(https://github.com/google/snappy), Zlib (http://www.zlib.net/) and +Zstd (http://www.zstd.net). + +C-Blosc also comes with highly optimized (they can use +SSE2 or AVX2 instructions, if available) shuffle and bitshuffle filters +(for info on how and why shuffling works [see here](https://www.slideshare.net/PyData/blosc-py-data-2014/17?src=clipshare)). +However, additional compressors or filters may be added in the future. + +Blosc is in charge of coordinating the different compressor and +filters so that they can leverage the +[blocking technique](http://blosc.org/docs/StarvingCPUs-CISE-2010.pdf) +as well as multi-threaded execution (if several cores are +available) automatically. That makes that every codec and filter +will work at very high speeds, even if it was not initially designed +for doing blocking or multi-threading. + +Finally, C-Blosc is specially suited to deal with binary data because +it can take advantage of the type size meta-information for improved +compression ratio by using the integrated shuffle and bitshuffle filters. + +When taken together, all these features set Blosc apart from other +compression libraries. + +## Compiling the Blosc library + +Blosc can be built, tested and installed using CMake_. +The following procedure describes the "out of source" build. + +```console + + $ cd c-blosc + $ mkdir build + $ cd build +``` + +Now run CMake configuration and optionally specify the installation +directory (e.g. '/usr' or '/usr/local'): + +```console + + $ cmake -DCMAKE_INSTALL_PREFIX=your_install_prefix_directory .. +``` + +CMake allows to configure Blosc in many different ways, like preferring +internal or external sources for compressors or enabling/disabling +them. Please note that configuration can also be performed using UI +tools provided by [CMake](http://www.cmake.org) (ccmake or cmake-gui): + +```console + + $ ccmake .. # run a curses-based interface + $ cmake-gui .. # run a graphical interface +``` + +Build, test and install Blosc: + + +```console + + $ cmake --build . + $ ctest + $ cmake --build . --target install +``` + +The static and dynamic version of the Blosc library, together with +header files, will be installed into the specified +CMAKE_INSTALL_PREFIX. + +### Codec support with CMake + +C-Blosc comes with full sources for LZ4, LZ4HC, Snappy, Zlib and Zstd +and in general, you should not worry about not having (or CMake +not finding) the libraries in your system because by default the +included sources will be automatically compiled and included in the +C-Blosc library. This effectively means that you can be confident in +having a complete support for all the codecs in all the Blosc deployments +(unless you are explicitly excluding support for some of them). + +But in case you want to force Blosc to use external codec libraries instead of +the included sources, you can do that: + +``` console + + $ cmake -DPREFER_EXTERNAL_ZSTD=ON .. +``` + +You can also disable support for some compression libraries: + + +```console + + $ cmake -DDEACTIVATE_SNAPPY=ON .. # in case you don't have a C++ compiler +``` + +## Examples + +In the [examples/ directory](https://github.com/Blosc/c-blosc/tree/master/examples) +you can find hints on how to use Blosc inside your app. + +## Supported platforms + +Blosc is meant to support all platforms where a C89 compliant C +compiler can be found. The ones that are mostly tested are Intel +(Linux, Mac OSX and Windows) and ARM (Linux), but exotic ones as IBM +Blue Gene Q embedded "A2" processor are reported to work too. + +### Mac OSX troubleshooting + +If you run into compilation troubles when using Mac OSX, please make +sure that you have installed the command line developer tools. You +can always install them with: + +```console + + $ xcode-select --install +``` + +## Wrapper for Python + +Blosc has an official wrapper for Python. See: + +https://github.com/Blosc/python-blosc + +## Command line interface and serialization format for Blosc + +Blosc can be used from command line by using Bloscpack. See: + +https://github.com/Blosc/bloscpack + +## Filter for HDF5 + +For those who want to use Blosc as a filter in the HDF5 library, +there is a sample implementation in the hdf5-blosc project in: + +https://github.com/Blosc/hdf5-blosc + +## Mailing list + +There is an official mailing list for Blosc at: + +blosc@googlegroups.com +http://groups.google.es/group/blosc + +## Acknowledgments + +See THANKS.rst. + + +---- + + **Enjoy data!** diff --git a/c-blosc/README_HEADER.rst b/c-blosc/README_HEADER.rst new file mode 100644 index 0000000..0f97804 --- /dev/null +++ b/c-blosc/README_HEADER.rst @@ -0,0 +1,65 @@ +Blosc Header Format +=================== + +Blosc (as of Version 1.0.0) has the following 16 byte header that stores +information about the compressed buffer:: + + |-0-|-1-|-2-|-3-|-4-|-5-|-6-|-7-|-8-|-9-|-A-|-B-|-C-|-D-|-E-|-F-| + ^ ^ ^ ^ | nbytes | blocksize | ctbytes | + | | | | + | | | +--typesize + | | +------flags + | +----------versionlz + +--------------version + +Datatypes of the Header Entries +------------------------------- + +All entries are little endian. + +:version: + (``uint8``) Blosc format version. +:versionlz: + (``uint8``) Version of the internal compressor used. +:flags and compressor enumeration: + (``bitfield``) The flags of the buffer + + :bit 0 (``0x01``): + Whether the byte-shuffle filter has been applied or not. + :bit 1 (``0x02``): + Whether the internal buffer is a pure memcpy or not. + :bit 2 (``0x04``): + Whether the bit-shuffle filter has been applied or not. + :bit 3 (``0x08``): + Reserved, must be zero. + :bit 4 (``0x10``): + If set, the blocks will not be split in sub-blocks during compression. + :bit 5 (``0x20``): + Part of the enumeration for compressors. + :bit 6 (``0x40``): + Part of the enumeration for compressors. + :bit 7 (``0x80``): + Part of the enumeration for compressors. + + The last three bits form an enumeration that allows to use alternative + compressors. + + :``0``: + ``blosclz`` + :``1``: + ``lz4`` or ``lz4hc`` + :``2``: + ``snappy`` + :``3``: + ``zlib`` + :``4``: + ``zstd`` + +:typesize: + (``uint8``) Number of bytes for the atomic type. +:nbytes: + (``uint32``) Uncompressed size of the buffer. +:blocksize: + (``uint32``) Size of internal blocks. +:ctbytes: + (``uint32``) Compressed size of the buffer. diff --git a/c-blosc/README_THREADED.rst b/c-blosc/README_THREADED.rst new file mode 100644 index 0000000..4d427f9 --- /dev/null +++ b/c-blosc/README_THREADED.rst @@ -0,0 +1,33 @@ +Blosc supports threading +======================== + +Threads are the most efficient way to program parallel code for +multi-core processors, but also the more difficult to program well. +Also, they has a non-negligible start-up time that does not fit well +with a high-performance compressor as Blosc tries to be. + +In order to reduce the overhead of threads as much as possible, I've +decided to implement a pool of threads (the workers) that are waiting +for the main process (the master) to send them jobs (basically, +compressing and decompressing small blocks of the initial buffer). + +Despite this and many other internal optimizations in the threaded +code, it does not work faster than the serial version for buffer sizes +around 64/128 KB or less. This is for Intel Quad Core2 (Q8400 @ 2.66 +GHz) / Linux (openSUSE 11.2, 64 bit), but your mileage may vary (and +will vary!) for other processors / operating systems. + +In contrast, for buffers larger than 64/128 KB, the threaded version +starts to perform significantly better, being the sweet point at 1 MB +(again, this is with my setup). For larger buffer sizes than 1 MB, +the threaded code slows down again, but it is probably due to a cache +size issue and besides, it is still considerably faster than serial +code. + +This is why Blosc falls back to use the serial version for such a +'small' buffers. So, you don't have to worry too much about deciding +whether you should set the number of threads to 1 (serial) or more +(parallel). Just set it to the number of cores in your processor and +your are done! + +Francesc Alted diff --git a/c-blosc/RELEASE_NOTES.rst b/c-blosc/RELEASE_NOTES.rst new file mode 100644 index 0000000..29587e3 --- /dev/null +++ b/c-blosc/RELEASE_NOTES.rst @@ -0,0 +1,965 @@ +=========================== + Release notes for C-Blosc +=========================== + +:Author: Francesc Alted +:Contact: francesc@blosc.org +:URL: http://www.blosc.org + + +Changes from 1.14.2 to 1.14.3 +============================= + +- Use win32/pthread.c on all Windows builds, even those with GNU compilers. + Rational: although MinGW provides a more full-featured pthreads replacement, + it doesn't seem to accomplish anything here since the functionality in + win32/pthread.c is sufficient for Blosc. Furthermore, using the MinGW + pthreads adds an additional library dependency to libblosc that is + annoying for binary distribution. For example, it got in the way of + distributing cross-compiled Windows binaries for use with Julia, since they + want the resulting libblosc.dll to be usable on any Windows machine even + where MinGW is not installed. See PR #224. Thanks to Steven G. Johnson. + +- Zstd internal sources have been updated to 1.3.4. + + +Changes from 1.14.1 to 1.14.2 +============================= + +- Reverted the $Configuration var in CMake configuration for Windows so + as to restore the compatibility with MS VisualStudio compilers. + + +Changes from 1.14.0 to 1.14.1 +============================= + +- Fixed a bug that caused C-Blosc to crash on platforms requiring strict + alignment (as in some kinds of ARM CPUs). Fixes #223. Thanks to Elvis + Stansvik and Michael Hudson-Doyle for their help. + +- Fixed a piece of code that was not C89 compliant. C89 compliance is + needed mainly by MS VS2008 which is still used for creating Python 2 + extensions. + +- Remove the (spurious) $Configuration var in cmake config for Windows. + Thanks to Francis Brissette for pointing this out. + + +Changes from 1.13.7 to 1.14.0 +============================= + +- New split mode that favors forward compatibility. That means that, + from now on, all the buffers created starting with blosc 1.14.0 will + be forward compatible with any previous versions of the library --at + least until 1.3.0, when support for multi-codec was introduced. + + So as to select the split mode, a new API function has been introduced: + https://github.com/Blosc/c-blosc/blob/master/blosc/blosc.h#L500 + Also, the BLOSC_SPLITMODE environment variable is honored when using + the `blosc_compress()` function. See + https://github.com/Blosc/c-blosc/blob/master/blosc/blosc.h#L209 + + There is a dedicated blog entry about this at: + http://blosc.org/posts/new-forward-compat-policy/ + More info in PR #216. + + Caveat Emptor: Note that Blosc versions from 1.11.0 to 1.14.0 *might* + generate buffers that cannot be read with versions < 1.11.0, so if + forward compatibility is important to you, an upgrade to 1.14.0 is + recommended. + +- All warnings during cmake build stage are enabled by default now. + PR #218. Thanks to kalvdans. + +- Better checks on versions of formats inside Blosc. PR #219. Thanks + to kalvdans. + +- The BLOSC_PRINT_SHUFFLE_ACCEL environment variable is honored now. + This is useful for determining *at runtime* whether the different SIMD + capabilities (only for x86 kind processors) are available to Blosc to get + better performance during shuffle/bitshuffle operation. As an example, + here it is the normal output for the simple.c example:: + + $ ./simple + Blosc version info: 1.14.0.dev ($Date:: 2018-02-15 #$) + Compression: 4000000 -> 41384 (96.7x) + Decompression succesful! + Succesful roundtrip! + + and here with the BLOSC_PRINT_SHUFFLE_ACCEL environment variable set:: + + $ BLOSC_PRINT_SHUFFLE_ACCEL= ./simple + Blosc version info: 1.14.0.dev ($Date:: 2018-02-15 #$) + Shuffle CPU Information: + SSE2 available: True + SSE3 available: True + SSSE3 available: True + SSE4.1 available: True + SSE4.2 available: True + AVX2 available: True + AVX512BW available: False + XSAVE available: True + XSAVE enabled: True + XMM state enabled: True + YMM state enabled: True + ZMM state enabled: False + Compression: 4000000 -> 41384 (96.7x) + Decompression succesful! + Succesful roundtrip! + + Blosc only currently leverages the SSE2 and AVX2 instruction sets, but + it can recognize all of the above. This is useful mainly for debugging. + + +Changes from 1.13.6 to 1.13.7 +============================= + +- More tests for binaries in https://bintray.com/blosc/Conan. + + +Changes from 1.13.5 to 1.13.6 +============================= + +- More tests for binaries in https://bintray.com/blosc/Conan. + + +Changes from 1.13.4 to 1.13.5 +============================= + +- New conan binaries publicly accessible in https://bintray.com/blosc/Conan. + Still experimental, but feedback is appreciated. + + +Changes from 1.13.3 to 1.13.4 +============================= + +- Fixed a buffer overrun that happens when compressing small buffers and + len(destination_buffer) < (len(source_buffer) + BLOSC_MAX_OVERHEAD). + Reported by Ivan Smirnov. + + +Changes from 1.13.2 to 1.13.3 +============================= + +- Tests work now when external compressors are located in non-system locations. + Fixes #210. Thanks to Leif Walsh. + + +Changes from 1.13.1 to 1.13.2 +============================= + +- C-Blosc can be compiled on CentOS 6 now. + +- LZ4 internal codec upgraded to 1.8.1. + + +Changes from 1.13.0 to 1.13.1 +============================= + +- Fixed a bug uncovered by the python-blosc test suite: when a buffer is + to be copied, then we should reserve space for the header, not block pointers. + + +Changes from 1.12.1 to 1.13.0 +============================= + +- Serious optimization of memory copy functions (see new `blosc/fastcopy.c`). + This benefits the speed of all the codecs, but specially the BloscLZ one. + +- As a result of the above, the BloscLZ codec received a new adjustment of + knobs so that you should expect better compression ratios with it too. + +- LZ4 internal sources have been updated to 1.8.0. + +- Zstd internal sources have been updated to 1.3.3. + + +Changes from 1.12.0 to 1.12.1 +============================= + +- Backported BloscLZ parameters that were fine-tuned for C-Blosc2. + You should expect better compression ratios and faster operation, + specially on modern CPUs. See: + http://blosc.org/posts/blosclz-tuning/ + + +Changes from 1.11.3 to 1.12.0 +============================= + +- Snappy, Zlib and Zstd codecs are compiled internally now, even if they are + installed in the machine. This has been done in order to avoid + problems in machines having the shared libraries for the codecs + accessible but not the includes (typical in Windows boxes). Also, + the Zstd codec runs much faster when compiled internally. The + previous behaviour can be restored by activating the cmake options + PREFER_EXTERNAL_SNAPPY, PREFER_EXTERNAL_ZLIB and PREFER_EXTERNAL_ZSTD. + +- Zstd internal sources have been updated to 1.3.0. + + +Changes from 1.11.3 to 1.11.4 +============================= + +- Internal Zstd codec updated to 1.1.4. + + +Changes from 1.11.2 to 1.11.3 +============================= + +- Fixed #181: bitshuffle filter for big endian machines. + +- Internal Zstd codec updated to 1.1.3. + +- New blocksize for complevel 8 in automatic mode. This should help specially + the Zstd codec to achieve better compression ratios. + + +Changes from 1.11.1 to 1.11.2 +============================= + +- Enabled use as a CMake subproject, exporting shared & static library targets + for super-projects to use. See PRs #178, #179 and #180. Thanks to Kevin + Murray. + +- Internal LZ4 codec updated to 1.7.5. + +- Internal Zstd codec updated to 1.1.2. + + +Changes from 1.11.0 to 1.11.1 +============================= + +- Fixed a bug introduced in 1.11.0 and discovered by pandas test suite. This + basically prevented to decompress buffers compressed with previous versions of + C-Blosc. See: https://github.com/Blosc/python-blosc/issues/115 + + +Changes from 1.10.2 to 1.11.0 +============================= + +- Internal Zstd codec upgraded to 1.0.0. + +- New block size computation inherited from C-Blosc2. Benchmarks are saying that + this benefits mainly to LZ4, LZ4HC, Zlib and Zstd codecs, both in speed and in + compression ratios (although YMMV for your case). + +- Added the @rpath flag in Mac OSX for shared libraries. Fixes #175. + +- Added a fix for VS2008 discovered in: https://github.com/PyTables/PyTables/pull/569/files#diff-953cf824ebfea7208d2a2e312d9ccda2L126 + +- License changed from MIT to 3-clause BSD style. + + +Changes from 1.10.1 to 1.10.2 +============================= + +- Force the use of --std=gnu99 when using gcc. Fixes #174. + + +Changes from 1.10.0 to 1.10.1 +============================= + +- Removed an inconsistent check for C11 (__STDC_VERSION__ >= 201112L and + _ISOC11_SOURCE) as this seem to pose problems on compilers doing different + things in this check (e.g. clang). See + https://github.com/Blosc/bloscpack/issues/50. + + +Changes from 1.9.3 to 1.10.0 +============================ + +- Initial support for Zstandard (0.7.4). Zstandard (or Zstd for short) is a new + compression library that allows better compression than Zlib, but that works + typically faster (and some times much faster), making of it a good match for + Blosc. + + Although the Zstd format is considered stable + (http://fastcompression.blogspot.com.es/2016_07_03_archive.html), its API is + maturing very fast, and despite passing the extreme test suite for C-Blosc, + this codec should be considered in beta for C-Blosc usage purposes. Please + test it and report back any possible issues you may get. + + +Changes from 1.9.2 to 1.9.3 +=========================== + +- Reverted a mistake introduced in 1.7.1. At that time, bit-shuffling + was enabled for typesize == 1 (i.e. strings), but the change also + included byte-shuffling accidentally. This only affected performance, + but in a quite bad way (a copy was needed). This has been fixed and + byte-shuffling is not active when typesize == 1 anymore. + + +Changes from 1.9.1 to 1.9.2 +=========================== + +- Check whether Blosc is actually initialized before blosc_init(), + blosc_destroy() and blosc_free_resources(). This makes the library + more resistant to different initialization cycles + (e.g. https://github.com/stevengj/Blosc.jl/issues/19). + + +Changes from 1.9.0 to 1.9.1 +=========================== + +- The internal copies when clevel=0 are made now via memcpy(). At the + beginning of C-Blosc development, benchmarks where saying that the + internal, multi-threaded copies inside C-Blosc were faster than + memcpy(), but 6 years later, memcpy() made greats strides in terms + of efficiency. With this, you should expect an slight speed + advantage (10% ~ 20%) when C-Blosc is used as a replacement of + memcpy() (which should not be the most common scenario out there). + +- Added a new DEACTIVATE_AVX2 cmake option to explicitly disable AVX2 + at build-time. Thanks to James Bird. + +- The ``make -jN`` for parallel compilation should work now. Thanks + to James Bird. + + +Changes from 1.8.1 to 1.9.0 +=========================== + +* New blosc_get_nthreads() function to get the number of threads that + will be used internally during compression/decompression (set by + already existing blosc_set_nthreads()). + +* New blosc_get_compressor() function to get the compressor that will + be used internally during compression (set by already existing + blosc_set_compressor()). + +* New blosc_get_blocksize() function to get the internal blocksize to + be used during compression (set by already existing + blosc_set_blocksize()). + +* Now, when the BLOSC_NOLOCK environment variable is set (to any + value), the calls to blosc_compress() and blosc_decompress() will + call blosc_compress_ctx() and blosc_decompress_ctx() under the hood + so as to avoid the internal locks. See blosc.h for details. This + allows multi-threaded apps calling the non _ctx() functions to avoid + the internal locks in C-Blosc. For the not multi-threaded app + though, it is in general slower to call the _ctx() functions so the + use of BLOSC_NOLOCK is discouraged. + +* In the same vein, from now on, when the BLOSC_NTHREADS environment + variable is set to an integer, every call to blosc_compress() and + blosc_decompress() will call blosc_set_nthreads(BLOSC_NTHREADS) + before the actuall compression/decompression process. See blosc.h + for details. + +* Finally, if BLOSC_CLEVEL, BLOSC_SHUFFLE, BLOSC_TYPESIZE and/or + BLOSC_COMPRESSOR variables are set in the environment, these will be + also honored before calling blosc_compress(). + +* Calling blosc_init() before any other Blosc call, although + recommended, is not necessary anymore. The idea is that you can use + just the basic blosc_compress() and blosc_decompress() and control + other parameters (nthreads, compressor, blocksize) by using + environment variables (see above). + + +Changes from 1.8.0 to 1.8.1 +=========================== + +* Disable the use of __builtin_cpu_supports() for GCC 5.3.1 + compatibility. Details in: + https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/ZM2L65WIZEEQHHLFERZYD5FAG7QY2OGB/ + + +Changes from 1.7.1 to 1.8.0 +=========================== + +* The code is (again) compatible with VS2008 and VS2010. This is + important for compatibility with Python 2.6/2.7/3.3/3.4. + +* Introduced a new global lock during blosc_decompress() operation. + As the blosc_compress() was already guarded by a global lock, this + means that the compression/decompression is again thread safe. + However, when using C-Blosc from multi-threaded environments, it is + important to keep using the *_ctx() functions for performance + reasons. NOTE: _ctx() functions will be replaced by more powerful + ones in C-Blosc 2.0. + + +Changes from 1.7.0 to 1.7.1 +=========================== + +* Fixed a bug preventing bitshuffle to work correctly on getitem(). + Now, everything with bitshuffle seems to work correctly. + +* Fixed the thread initialization for blosc_decompress_ctx(). Issue + #158. Thanks to Chris Webers. + +* Fixed a bug in the blocksize computation introduced in 1.7.0. This + could have been creating segfaults. + +* Allow bitshuffle to run on 1-byte typesizes. + +* New parametrization of the blocksize to be independent of the + typesize. This allows a smoother speed throughout all typesizes. + +* lz4 and lz4hc codecs upgraded to 1.7.2 (from 1.7.0). + +* When calling set_nthreads() but not actually changing the number of + threads in the internal pool does not teardown and setup it anymore. + PR #153. Thanks to Santi Villalba. + + +Changes from 1.6.1 to 1.7.0 +=========================== + +* Added a new 'bitshuffle' filter so that the shuffle takes place at a + bit level and not just at a byte one, which is what it does the + previous 'shuffle' filter. + + For activating this new bit-level filter you only have to pass the + symbol BLOSC_BITSHUFFLE to `blosc_compress()`. For the previous + byte-level one, pass BLOSC_SHUFFLE. For disabling the shuffle, pass + BLOSC_NOSHUFFLE. + + This is a port of the existing filter in + https://github.com/kiyo-masui/bitshuffle. Thanks to Kiyo Masui for + changing the license and allowing its inclusion here. + +* New acceleration mode for LZ4 and BloscLZ codecs that enters in + operation with complevel < 9. This allows for an important boost in + speed with minimal compression ratio loss. Francesc Alted. + +* LZ4 codec updated to 1.7.0 (r130). + +* PREFER_EXTERNAL_COMPLIBS cmake option has been removed and replaced + by the more fine grained PREFER_EXTERNAL_LZ4, PREFER_EXTERNAL_SNAPPY + and PREFER_EXTERNAL_ZLIB. In order to allow the use of the new API + introduced in LZ4 1.7.0, PREFER_EXTERNAL_LZ4 has been set to OFF by + default, whereas PREFER_EXTERNAL_SNAPPY and PREFER_EXTERNAL_ZLIB + continues to be ON. + +* Implemented SSE2 shuffle support for buffers containing a number of + elements which is not a multiple of (typesize * vectorsize). Jack + Pappas. + +* Added SSE2 shuffle/unshuffle routines for types larger than 16 + bytes. Jack Pappas. + +* 'test_basic' suite has been split in components for a much better + granularity on what's a possibly failing test. Also, lots of new + tests have been added. Jack Pappas. + +* Fixed compilation on non-Intel archs (tested on ARM). Zbyszek + Szmek. + +* Modifyied cmake files in order to inform that AVX2 on Visual Studio + is supported only in 2013 update 2 and higher. + +* Added a replacement for stdbool.h for Visual Studio < 2013. + +* blosclz codec adds Win64/Intel as a platform supporting unaligned + addressing. That leads to a speed-up of 2.2x in decompression. + +* New blosc_get_version_string() function for retrieving the version + of the c-blosc library. Useful when linking with dynamic libraries + and one want to know its version. + +* New example (win-dynamic-linking.c) that shows how to link a Blosc + DLL dynamically in run-time (Windows only). + +* The `context.threads_started` is initialized now when decompressing. + This could cause crashes in case you decompressed before compressing + (e.g. directly deserializing blosc buffers). @atchouprakov. + +* The HDF5 filter has been removed from c-blosc and moved into its own + repo at: https://github.com/Blosc/hdf5 + +* The MS Visual Studio 2008 has been tested with c-blosc for ensuring + compatibility with extensions for Python 2.6 and up. + + +Changes from 1.6.0 to 1.6.1 +=========================== + +* Support for *runtime* detection of AVX2 and SSE2 SIMD instructions. + These changes make it possible to compile one single binary that + runs on a system that supports SSE2 or AVX2 (or neither), so the + redistribution problem is fixed (see #101). Thanks to Julian Taylor + and Jack Pappas. + +* Added support for MinGW and TDM-GCC compilers for Windows. Thanks + to yasushima-gd. + +* Fixed a bug in blosclz that could potentially overwrite an area + beyond the output buffer. See #113. + +* New computation for blocksize so that larger typesizes (> 8 bytes) + would benefit of much better compression ratios. Speed is not + penalized too much. + +* New parametrization of the hash table for blosclz codec. This + allows better compression in many scenarios, while slightly + increasing the speed. + + +Changes from 1.5.4 to 1.6.0 +=========================== + +* Support for AVX2 is here! The benchmarks with a 4-core Intel + Haswell machine tell that both compression and decompression are + accelerated around a 10%, reaching peaks of 9.6 GB/s during + compression and 26 GB/s during decompression (memcpy() speed for + this machine is 7.5 GB/s for writes and 11.7 GB/s for reads). Many + thanks to @littlezhou for this nice work. + +* Support for HPET (high precision timers) for the `bench` program. + This is particularly important for microbenchmarks like bench is + doing; since they take so little time to run, the granularity of a + less-accurate timer may account for a significant portion of the + runtime of the benchmark itself, skewing the results. Thanks to + Jack Pappas. + + +Changes from 1.5.3 to 1.5.4 +=========================== + +* Updated to LZ4 1.6.0 (r128). + +* Fix resource leak in t_blosc. Jack Pappas. + +* Better checks during testing. Jack Pappas. + +* Dynamically loadable HDF5 filter plugin. Kiyo Masui. + + +Changes from 1.5.2 to 1.5.3 +=========================== + +* Use llabs function (where available) instead of abs to avoid + truncating the result. Jack Pappas. + +* Use C11 aligned_alloc when it's available. Jack Pappas. + +* Use the built-in stdint.h with MSVC when available. Jack Pappas. + +* Only define the __SSE2__ symbol when compiling with MS Visual C++ + and targeting x64 or x86 with the correct /arch flag set. This + avoids re-defining the symbol which makes other compilers issue + warnings. Jack Pappas. + +* Reinitializing Blosc during a call to set_nthreads() so as to fix + problems with contexts. Francesc Alted. + + + +Changes from 1.5.1 to 1.5.2 +=========================== + +* Using blosc_compress_ctx() / blosc_decompress_ctx() inside the HDF5 + compressor for allowing operation in multiprocess scenarios. See: + https://github.com/PyTables/PyTables/issues/412 + + The drawback of this quick fix is that the Blosc filter will be only + able to use a single thread until another solution can be devised. + + +Changes from 1.5.0 to 1.5.1 +=========================== + +* Updated to LZ4 1.5.0. Closes #74. + +* Added the 'const' qualifier to non SSE2 shuffle functions. Closes #75. + +* Explicitly call blosc_init() in HDF5 blosc_filter.c, fixing a + segfault. + +* Quite a few improvements in cmake files for HDF5 support. Thanks to + Dana Robinson (The HDF Group). + +* Variable 'class' caused problems compiling the HDF5 filter with g++. + Thanks to Laurent Chapon. + +* Small improvements on docstrings of c-blosc main functions. + + +Changes from 1.4.1 to 1.5.0 +=========================== + +* Added new calls for allowing Blosc to be used *simultaneously* + (i.e. lock free) from multi-threaded environments. The new + functions are: + + - blosc_compress_ctx(...) + - blosc_decompress_ctx(...) + + See the new docstrings in blosc.h for how to use them. The previous + API should be completely unaffected. Thanks to Christopher Speller. + +* Optimized copies during BloscLZ decompression. This can make BloscLZ + to decompress up to 1.5x faster in some situations. + +* LZ4 and LZ4HC compressors updated to version 1.3.1. + +* Added an examples directory on how to link apps with Blosc. + +* stdlib.h moved from blosc.c to blosc.h as suggested by Rob Lathm. + +* Fix a warning for {snappy,lz4}-free compilation. Thanks to Andrew Schaaf. + +* Several improvements for CMakeLists.txt (cmake). + +* Fixing C99 compatibility warnings. Thanks to Christopher Speller. + + +Changes from 1.4.0 to 1.4.1 +=========================== + +* Fixed a bug in blosc_getitem() introduced in 1.4.0. Added a test for + blosc_getitem() as well. + + +Changes from 1.3.6 to 1.4.0 +=========================== + +* Support for non-Intel and non-SSE2 architectures has been added. In + particular, the Raspberry Pi platform (ARM) has been tested and all + tests pass here. + +* Architectures requiring strict access alignment are supported as well. + Due to this, arquitectures with a high penalty in accessing unaligned + data (e.g. Raspberry Pi, ARMv6) can compress up to 2.5x faster. + +* LZ4 has been updated to r119 (1.2.0) so as to fix a possible security + breach. + + +Changes from 1.3.5 to 1.3.6 +=========================== + +* Updated to LZ4 r118 due to a (highly unlikely) security hole. For + details see: + + http://fastcompression.blogspot.fr/2014/06/debunking-lz4-20-years-old-bug-myth.html + + +Changes from 1.3.4 to 1.3.5 +=========================== + +* Removed a pointer from 'pointer from integer without a cast' compiler + warning due to a bad macro definition. + + +Changes from 1.3.3 to 1.3.4 +=========================== + +* Fixed a false buffer overrun condition. This bug made c-blosc to + fail, even if the failure was not real. + +* Fixed the type of a buffer string. + + +Changes from 1.3.2 to 1.3.3 +=========================== + +* Updated to LZ4 1.1.3 (improved speed for 32-bit platforms). + +* Added a new `blosc_cbuffer_complib()` for getting the compression + library for a compressed buffer. + + +Changes from 1.3.1 to 1.3.2 +=========================== + +* Fix for compiling Snappy sources against MSVC 2008. Thanks to Mark + Wiebe! + +* Version for internal LZ4 and Snappy are now supported. When compiled + against the external libraries, this info is not available because + they do not support the symbols (yet). + + +Changes from 1.3.0 to 1.3.1 +=========================== + +* Fixes for a series of issues with the filter for HDF5 and, in + particular, a problem in the decompression buffer size that made it + impossible to use the blosc_filter in combination with other ones + (e.g. fletcher32). See + https://github.com/PyTables/PyTables/issues/21. + + Thanks to Antonio Valentino for the fix! + + +Changes from 1.2.4 to 1.3.0 +=========================== + +A nice handful of compressors have been added to Blosc: + +* LZ4 (http://code.google.com/p/lz4/): A very fast + compressor/decompressor. Could be thought as a replacement of the + original BloscLZ, but it can behave better is some scenarios. + +* LZ4HC (http://code.google.com/p/lz4/): This is a variation of LZ4 + that achieves much better compression ratio at the cost of being + much slower for compressing. Decompression speed is unaffected (and + sometimes better than when using LZ4 itself!), so this is very good + for read-only datasets. + +* Snappy (http://code.google.com/p/snappy/): A very fast + compressor/decompressor. Could be thought as a replacement of the + original BloscLZ, but it can behave better is some scenarios. + +* Zlib (http://www.zlib.net/): This is a classic. It achieves very + good compression ratios, at the cost of speed. However, + decompression speed is still pretty good, so it is a good candidate + for read-only datasets. + +With this, you can select the compression library with the new +function:: + + int blosc_set_complib(char* complib); + +where you pass the library that you want to use (currently "blosclz", +"lz4", "lz4hc", "snappy" and "zlib", but the list can grow in the +future). + +You can get more info about compressors support in you Blosc build by +using these functions:: + + char* blosc_list_compressors(void); + int blosc_get_complib_info(char *compressor, char **complib, char **version); + + +Changes from 1.2.2 to 1.2.3 +=========================== + +- Added a `blosc_init()` and `blosc_destroy()` so that the global lock + can be initialized safely. These new functions will also allow other + kind of initializations/destructions in the future. + + Existing applications using Blosc do not need to start using the new + functions right away, as long as they calling `blosc_set_nthreads()` + previous to anything else. However, using them is highly recommended. + + Thanks to Oscar Villellas for the init/destroy suggestion, it is a + nice idea! + + +Changes from 1.2.1 to 1.2.2 +=========================== + +- All important warnings removed for all tested platforms. This will + allow less intrusiveness compilation experiences with applications + including Blosc source code. + +- The `bench/bench.c` has been updated so that it can be compiled on + Windows again. + +- The new web site has been set to: http://www.blosc.org + + +Changes from 1.2 to 1.2.1 +========================= + +- Fixed a problem with global lock not being initialized. This + affected mostly to Windows platforms. Thanks to Christoph + Gohlke for finding the cure! + + +Changes from 1.1.5 to 1.2 +========================= + +- Now it is possible to call Blosc simultaneously from a parent threaded + application without problems. This has been solved by setting a + global lock so that the different calling threads do not execute Blosc + routines at the same time. Of course, real threading work is still + available *inside* Blosc itself. Thanks to Thibault North. + +- Support for cmake is now included. Linux, Mac OSX and Windows + platforms are supported. Thanks to Thibault North, Antonio Valentino + and Mark Wiebe. + +- Fixed many compilers warnings (specially about unused variables). + +- As a consequence of the above, as minimal change in the API has been + introduced. That is, the previous API:: + + void blosc_free_resources(void) + + has changed to:: + + int blosc_free_resources(void) + + Now, a return value of 0 means that the resources have been released + successfully. If the return value is negative, then it is not + guaranteed that all the resources have been freed. + +- Many typos were fixed and docs have been improved. The script for + generating nice plots for the included benchmarks has been improved + too. Thanks to Valetin Haenel. + + +Changes from 1.1.4 to 1.1.5 +=========================== + +- Fix compile error with msvc compilers (Christoph Gohlke) + + +Changes from 1.1.3 to 1.1.4 +=========================== + +- Redefinition of the BLOSC_MAX_BUFFERSIZE constant as (INT_MAX - + BLOSC_MAX_OVERHEAD) instead of just INT_MAX. This prevents to produce + outputs larger than INT_MAX, which is not supported. + +- `exit()` call has been replaced by a ``return -1`` in blosc_compress() + when checking for buffer sizes. Now programs will not just exit when + the buffer is too large, but return a negative code. + +- Improvements in explicit casts. Blosc compiles without warnings + (with GCC) now. + +- Lots of improvements in docs, in particular a nice ascii-art diagram + of the Blosc format (Valentin Haenel). + +- Improvements to the plot-speeds.py (Valentin Haenel). + +- [HDF5 filter] Adapted HDF5 filter to use HDF5 1.8 by default + (Antonio Valentino). + +- [HDF5 filter] New version of H5Z_class_t definition (Antonio Valentino). + + +Changes from 1.1.2 to 1.1.3 +=========================== + +- Much improved compression ratio when using large blocks (> 64 KB) and + high compression levels (> 6) under some circumstances (special data + distribution). Closes #7. + + +Changes from 1.1.1 to 1.1.2 +=========================== + +- Fixes for small typesizes (#6 and #1 of python-blosc). + + +Changes from 1.1 to 1.1.1 +========================= + +- Added code to avoid calling blosc_set_nthreads more than necessary. + That will improve performance up to 3x or more, specially for small + chunksizes (< 1 MB). + + +Changes from 1.0 to 1.1 +======================= + +- Added code for emulating pthreads API on Windows. No need to link + explicitly with pthreads lib on Windows anymore. However, performance + is a somewhat worse because the new emulation layer does not support + the `pthread_barrier_wait()` call natively. But the big improvement + in installation easiness is worth this penalty (most specially on + 64-bit Windows, where pthreads-win32 support is flaky). + +- New BLOSC_MAX_BUFFERSIZE, BLOSC_MAX_TYPESIZE and BLOSC_MAX_THREADS + symbols are available in blosc.h. These can be useful for validating + parameters in clients. Thanks to Robert Smallshire for suggesting + that. + +- A new BLOSC_MIN_HEADER_LENGTH symbol in blosc.h tells how many bytes + long is the minimum length of a Blosc header. `blosc_cbuffer_sizes()` + only needs these bytes to be passed to work correctly. + +- Removed many warnings (related with potentially dangerous type-casting + code) issued by MSVC 2008 in 64-bit mode. + +- Fixed a problem with the computation of the blocksize in the Blosc + filter for HDF5. + +- Fixed a problem with large datatypes. See + http://www.pytables.org/trac/ticket/288 for more info. + +- Now Blosc is able to work well even if you fork an existing process + with a pool of threads. Bug discovered when PyTables runs in + multiprocess environments. See http://pytables.org/trac/ticket/295 + for details. + +- Added a new `blosc_getitem()` call to allow the retrieval of items in + sizes smaller than the complete buffer. That is useful for the carray + project, but certainly for others too. + + +Changes from 0.9.5 to 1.0 +========================= + +- Added a filter for HDF5 so that people can use Blosc outside PyTables, + if they want to. + +- Many small improvements, specially in README files. + +- Do not assume that size_t is uint_32 for every platform. + +- Added more protection for large buffers or in allocation memory + routines. + +- The src/ directory has been renamed to blosc/. + +- The `maxbytes` parameter in `blosc_compress()` has been renamed to + `destsize`. This is for consistency with the `blosc_decompress()` + parameters. + + +Changes from 0.9.4 to 0.9.5 +=========================== + +- Now, compression level 0 is allowed, meaning not compression at all. + The overhead of this mode will be always BLOSC_MAX_OVERHEAD (16) + bytes. This mode actually represents using Blosc as a basic memory + container. + +- Supported a new parameter `maxbytes` for ``blosc_compress()``. It + represents a maximum of bytes for output. Tests unit added too. + +- Added 3 new functions for querying different metadata on compressed + buffers. A test suite for testing the new API has been added too. + + +Changes from 0.9.3 to 0.9.4 +=========================== + +- Support for cross-platform big/little endian compatibility in Blosc + headers has been added. + +- Fixed several failures exposed by the extremesuite. The problem was a + bad check for limits in the buffer size while compressing. + +- Added a new suite in bench.c called ``debugsuite`` that is + appropriate for debugging purposes. Now, the ``extremesuite`` can be + used for running the complete (and extremely long) suite. + + +Changes from 0.9.0 to 0.9.3 +=========================== + +- Fixed several nasty bugs uncovered by the new suites in bench.c. + Thanks to Tony Theodore and Gabriel Beckers for their (very) + responsive beta testing and feedback. + +- Added several modes (suites), namely ``suite``, ``hardsuite`` and + ``extremehardsuite`` in bench.c so as to allow different levels of + testing. + + +Changes from 0.8.0 to 0.9 +========================= + +- Internal format version bumped to 2 in order to allow an easy way to + indicate that a buffer is being saved uncompressed. This is not + supported yet, but it might be in the future. + +- Blosc can use threads now for leveraging the increasing number of + multi-core processors out there. See README-threaded.txt for more + info. + +- Added a protection for MacOSX so that it has to not link against + posix_memalign() funtion, which seems not available in old versions of + MacOSX (for example, Tiger). At nay rate, posix_memalign() is not + necessary on Mac because 16 bytes alignment is ensured by default. + Thanks to Ivan Vilata. Fixes #3. diff --git a/c-blosc/RELEASING.rst b/c-blosc/RELEASING.rst new file mode 100644 index 0000000..11fe1d7 --- /dev/null +++ b/c-blosc/RELEASING.rst @@ -0,0 +1,111 @@ +================ +Releasing Blosc +================ + +:Author: Francesc Alted +:Contact: francesc@blosc.org +:Date: 2014-01-15 + + +Preliminaries +------------- + +- Make sure that ``RELEASE_NOTES.rst`` and ``ANNOUNCE.rst`` are up to + date with the latest news in the release. + +- Check that *VERSION* symbols in blosc/blosc.h contains the correct info. + +- Commit the changes:: + + $ git commit -a -m"Getting ready for X.Y.Z release" + + +Testing +------- + +Create a new build/ directory, change into it and issue:: + + $ cmake .. + $ cmake --build . + $ ctest + +To actually test Blosc the hard way, look at the end of: + +http://blosc.org/synthetic-benchmarks.html + +where instructions on how to intensively test (and benchmark) Blosc +are given. + +Forward compatibility testing +----------------------------- + +First, go to the compat/ directory and generate a file with the current +version:: + + $ cd ../compat + $ export LD_LIBRARY_PATH=../build/blosc + $ gcc -o filegen filegen.c -L$LD_LIBRARY_PATH -lblosc -I../blosc + $ ./filegen compress lz4 blosc-lz4-1.y.z.cdata + +In order to make sure that we are not breaking forward compatibility, +link and run the `compat/filegen` utility against different versions of +the Blosc library (suggestion: 1.3.0, 1.7.0, 1.11.1, 1.14.1). + +You can compile the utility with different blosc shared libraries with:: + + $ export LD_LIBRARY_PATH=shared_blosc_library_path + $ gcc -o filegen filegen.c -L$LD_LIBRARY_PATH -lblosc -Iblosc.h_include_path + +Then, test the file created with the new version with:: + + $ ./filegen decompress blosc-lz4-1.y.z.cdata + +Repeat this for every codec shipped with Blosc (blosclz, lz4, lz4hc, snappy, +zlib and zstd). + +Tagging +------- + +- Create a tag ``X.Y.Z`` from ``master``. Use the next message:: + + $ git tag -a vX.Y.Z -m "Tagging version X.Y.Z" + +- Push the previous commits and tag to the github repo:: + + $ git push + $ git push --tags + + +Announcing +---------- + +- Send an announcement to the blosc, pytables-dev, bcolz and + comp.compression lists. Use the ``ANNOUNCE.rst`` file as skeleton + (possibly as the definitive version). + + +Post-release actions +-------------------- + +- Edit *VERSION* symbols in blosc/blosc.h in master to increment the + version to the next minor one (i.e. X.Y.Z --> X.Y.(Z+1).dev). + +- Create new headers for adding new features in ``RELEASE_NOTES.rst`` + and add this place-holder instead: + + #XXX version-specific blurb XXX# + +- Commit the changes:: + + $ git commit -a -m"Post X.Y.Z release actions done" + $ git push + + +That's all folks! + + +.. Local Variables: +.. mode: rst +.. coding: utf-8 +.. fill-column: 70 +.. End: diff --git a/c-blosc/THANKS.rst b/c-blosc/THANKS.rst new file mode 100644 index 0000000..548862a --- /dev/null +++ b/c-blosc/THANKS.rst @@ -0,0 +1,35 @@ +I'd like to thank the PyTables community that have collaborated in the +exhaustive testing of Blosc. With an aggregate amount of more than +300 TB of different datasets compressed *and* decompressed +successfully, I can say that Blosc is pretty safe now and ready for +production purposes. + +Other important contributions: + +* Valentin Haenel did a terrific work implementing the support for the + Snappy compression, fixing typos and improving docs and the plotting + script. + +* Thibault North, with ideas from Oscar Villellas, contributed a way + to call Blosc from different threads in a safe way. Christopher + Speller introduced contexts so that a global lock is not necessary + anymore. + +* The CMake support was initially contributed by Thibault North, and + Antonio Valentino and Mark Wiebe made great enhancements to it. + +* Christopher Speller also introduced the two new '_ctx' calls to + avoid the use of the blosc_init() and blosc_destroy(). + +* Jack Pappas contributed important portability enhancements, + specially runtime and cross-platform detection of SSE2/AVX2 as well + as high precision timers (HPET) for the benchmark program. + +* @littlezhou implemented the AVX2 version of shuffle routines. + +* Julian Taylor contributed a way to detect AVX2 in runtime and + calling the appropriate routines only if the undelying hardware + supports it. + +* Kiyo Masui for relicensing his bitshuffle project for allowing the + inclusion of part of his code in Blosc. diff --git a/c-blosc/THOUGHTS_FOR_2.0.txt b/c-blosc/THOUGHTS_FOR_2.0.txt new file mode 100644 index 0000000..8303a81 --- /dev/null +++ b/c-blosc/THOUGHTS_FOR_2.0.txt @@ -0,0 +1,19 @@ +Here there go some random thoughts for Blosc 2.0: + +* Try to find a classification for the return codes and standarize + them. Use constants in blosc.h for naming them. + +* blosc_init() should return a structure that should serve as a + context for other calls. So instead of blosc_set_blocksize(size_t + size), you would have blosc_set_blocksize(blosc_context* context, + size_t size). + + See: https://github.com/Blosc/c-blosc/pull/66/files#r20010877 for + some discussion. + +* Add more space in header for allowing more compressors and filters. + Discuss how much should that space be in the mailing list. + +* Make two different versions of the header so that it supports 32-bit + sizes as well as 64-bit? That would allow to compress buffers larger + than INT_MAX. diff --git a/c-blosc/appveyor.yml b/c-blosc/appveyor.yml new file mode 100644 index 0000000..ecf8ee4 --- /dev/null +++ b/c-blosc/appveyor.yml @@ -0,0 +1,34 @@ +build: false + + +environment: + PYTHON: "C:\\Python27" + PYTHON_VERSION: "2.7.8" + PYTHON_ARCH: "32" + + CONAN_USERNAME: "francescalted" + CONAN_LOGIN_USERNAME: "francescalted" + CONAN_CHANNEL: "stable" + CONAN_UPLOAD: "https://api.bintray.com/conan/blosc/Conan" + + + matrix: + - MINGW_CONFIGURATIONS: "4.9@x86_64@seh@posix, 5@x86_64@seh@posix, 6@x86_64@seh@posix, 7@x86_64@seh@posix" + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + CONAN_VISUAL_VERSIONS: 12 + CONAN_RUN_TESTS: "1" + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + CONAN_VISUAL_VERSIONS: 14 + CONAN_RUN_TESTS: "1" + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + CONAN_VISUAL_VERSIONS: 15 + CONAN_RUN_TESTS: "1" + +install: + - set PATH=%PATH%;%PYTHON%/Scripts/ + - pip.exe install conan --upgrade + - pip.exe install conan_package_tools + +test_script: + - python build.py + diff --git a/c-blosc/appveyor/run_with_env.cmd b/c-blosc/appveyor/run_with_env.cmd new file mode 100644 index 0000000..5da547c --- /dev/null +++ b/c-blosc/appveyor/run_with_env.cmd @@ -0,0 +1,88 @@ +:: To build extensions for 64 bit Python 3, we need to configure environment +:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) +:: +:: To build extensions for 64 bit Python 2, we need to configure environment +:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) +:: +:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific +:: environment configurations. +:: +:: Note: this script needs to be run with the /E:ON and /V:ON flags for the +:: cmd interpreter, at least for (SDK v7.0) +:: +:: More details at: +:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows +:: http://stackoverflow.com/a/13751649/163740 +:: +:: Author: Olivier Grisel +:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ +:: +:: Notes about batch files for Python people: +:: +:: Quotes in values are literally part of the values: +:: SET FOO="bar" +:: FOO is now five characters long: " b a r " +:: If you don't want quotes, don't include them on the right-hand side. +:: +:: The CALL lines at the end of this file look redundant, but if you move them +:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y +:: case, I don't know why. +@ECHO OFF + +SET COMMAND_TO_RUN=%* +SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows +SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf + +:: Extract the major and minor versions, and allow for the minor version to be +:: more than 9. This requires the version number to have two dots in it. +SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1% +IF "%PYTHON_VERSION:~3,1%" == "." ( + SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1% +) ELSE ( + SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2% +) + +:: Based on the Python version, determine what SDK version to use, and whether +:: to set the SDK for 64-bit. +IF %MAJOR_PYTHON_VERSION% == 2 ( + SET WINDOWS_SDK_VERSION="v7.0" + SET SET_SDK_64=Y +) ELSE ( + IF %MAJOR_PYTHON_VERSION% == 3 ( + SET WINDOWS_SDK_VERSION="v7.1" + IF %MINOR_PYTHON_VERSION% LEQ 4 ( + SET SET_SDK_64=Y + ) ELSE ( + SET SET_SDK_64=N + IF EXIST "%WIN_WDK%" ( + :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ + REN "%WIN_WDK%" 0wdf + ) + ) + ) ELSE ( + ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" + EXIT 1 + ) +) + +IF %PYTHON_ARCH% == 64 ( + IF %SET_SDK_64% == Y ( + ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture + SET DISTUTILS_USE_SDK=1 + SET MSSdk=1 + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 + ) ELSE ( + ECHO Using default MSVC build environment for 64 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 + ) +) ELSE ( + ECHO Using default MSVC build environment for 32 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 +) diff --git a/c-blosc/bench/CMakeLists.txt b/c-blosc/bench/CMakeLists.txt new file mode 100644 index 0000000..1aad1f6 --- /dev/null +++ b/c-blosc/bench/CMakeLists.txt @@ -0,0 +1,123 @@ +# sources +set(SOURCES bench.c) + + +# targets +add_executable(bench ${SOURCES}) +if(UNIX AND NOT APPLE AND NOT HAIKU) + # cmake is complaining about LINK_PRIVATE in original PR + # and removing it does not seem to hurt, so be it. + # target_link_libraries(bench LINK_PRIVATE rt) + target_link_libraries(bench rt) +endif(UNIX AND NOT APPLE AND NOT HAIKU) +target_link_libraries(bench blosc_shared) + +# have to copy blosc dlls on Windows +if(MSVC) + add_custom_command( + TARGET bench + POST_BUILD + COMMAND ${CMAKE_COMMAND} + ARGS -E copy_if_different + "${PROJECT_BINARY_DIR}/blosc/\$\(Configuration\)/blosc.dll" + "${CMAKE_CURRENT_BINARY_DIR}/\$\(Configuration\)/blosc.dll") +elseif(MINGW) + add_custom_command( + TARGET bench + POST_BUILD + COMMAND ${CMAKE_COMMAND} + ARGS -E copy_if_different + "${PROJECT_BINARY_DIR}/blosc/libblosc.dll" + "${CMAKE_CURRENT_BINARY_DIR}/libblosc.dll") +endif() + +# tests +if(BUILD_TESTS) + + # The commented tests below take too much time to complete + option(TEST_INCLUDE_BENCH_SHUFFLE_1 "Include bench shuffle (1 thread) in the tests" ON) + if(TEST_INCLUDE_BENCH_SHUFFLE_1) + set(SHUFFLE_1_OPTS shuffle test 1) + add_test(test_blosclz_shuffle_1 bench blosclz ${SHUFFLE_1_OPTS}) + if (HAVE_LZ4) + add_test(test_lz4_shuffle_1 bench lz4 ${SHUFFLE_1_OPTS}) + # add_test(test_lz4hc_shuffle_1 bench lz4hc ${SHUFFLE_1_OPTS}) + endif (HAVE_LZ4) + if (HAVE_SNAPPY) + add_test(test_snappy_shuffle_1 bench snappy ${SHUFFLE_1_OPTS}) + endif (HAVE_SNAPPY) + if (HAVE_ZLIB) + # add_test(test_zlib_shuffle_1 bench zlib ${SHUFFLE_1_OPTS}) + endif (HAVE_ZLIB) + if (HAVE_ZSTD) + # add_test(test_zstd_shuffle_1 bench zstd ${SHUFFLE_1_OPTS}) + endif (HAVE_ZSTD) + endif(TEST_INCLUDE_BENCH_SHUFFLE_1) + + option(TEST_INCLUDE_BENCH_SHUFFLE_N "Include bench shuffle (multithread) in the tests" ON) + if(TEST_INCLUDE_BENCH_SHUFFLE_N) + set(SHUFFLE_N_OPTS shuffle test) + add_test(test_blosclz_shuffle_n bench blosclz ${SHUFFLE_N_OPTS}) + if (HAVE_LZ4) + add_test(test_lz4_shuffle_n bench lz4 ${SHUFFLE_N_OPTS}) + add_test(test_lz4hc_shuffle_n bench lz4hc ${SHUFFLE_N_OPTS}) + endif (HAVE_LZ4) + if (HAVE_SNAPPY) + add_test(test_snappy_shuffle_n bench snappy ${SHUFFLE_N_OPTS}) + endif (HAVE_SNAPPY) + if (HAVE_ZLIB) + add_test(test_zlib_shuffle_n bench zlib ${SHUFFLE_N_OPTS}) + endif (HAVE_ZLIB) + if (HAVE_ZSTD) + add_test(test_zstd_shuffle_n bench zstd ${SHUFFLE_N_OPTS}) + endif (HAVE_ZSTD) + endif(TEST_INCLUDE_BENCH_SHUFFLE_N) + + option(TEST_INCLUDE_BENCH_BITSHUFFLE_1 "Include bench bitshuffle (1 thread) in the tests" ON) + if(TEST_INCLUDE_BENCH_BITSHUFFLE_1) + set(BITSHUFFLE_1_OPTS bitshuffle test 1) + add_test(test_blosclz_bitshuffle_1 bench blosclz ${BITSHUFFLE_1_OPTS}) + if (HAVE_LZ4) + add_test(test_lz4_bitshuffle_1 bench lz4 ${BITSHUFFLE_1_OPTS}) + # add_test(test_lz4hc_bitshuffle_1 bench lz4hc ${BITSHUFFLE_1_OPTS}) + endif (HAVE_LZ4) + if (HAVE_SNAPPY) + add_test(test_snappy_bitshuffle_1 bench snappy ${BITSHUFFLE_1_OPTS}) + endif (HAVE_SNAPPY) + if (HAVE_ZLIB) + # add_test(test_zlib_bitshuffle_1 bench zlib ${BITSHUFFLE_1_OPTS}) + endif (HAVE_ZLIB) + if (HAVE_ZSTD) + # add_test(test_zstd_bitshuffle_1 bench zstd ${BITSHUFFLE_1_OPTS}) + endif (HAVE_ZSTD) + endif(TEST_INCLUDE_BENCH_BITSHUFFLE_1) + + option(TEST_INCLUDE_BENCH_BITSHUFFLE_N "Include bench bitshuffle (multithread) in the tests" ON) + if(TEST_INCLUDE_BENCH_BITSHUFFLE_N) + set(BITSHUFFLE_N_OPTS bitshuffle test) + add_test(test_blosclz_bitshuffle_n bench blosclz ${BITSHUFFLE_N_OPTS}) + if (HAVE_LZ4) + add_test(test_lz4_bitshuffle_n bench lz4 ${BITSHUFFLE_N_OPTS}) + # add_test(test_lz4hc_bitshuffle_n bench lz4hc ${BITSHUFFLE_N_OPTS}) + endif (HAVE_LZ4) + if (HAVE_SNAPPY) + add_test(test_snappy_bitshuffle_n bench snappy ${BITSHUFFLE_N_OPTS}) + endif (HAVE_SNAPPY) + if (HAVE_ZLIB) + # add_test(test_zlib_bitshuffle_n bench zlib ${BITSHUFFLE_N_OPTS}) + endif (HAVE_ZLIB) + if (HAVE_ZSTD) + # add_test(test_zstd_bitshuffle_n bench zstd ${BITSHUFFLE_N_OPTS}) + endif (HAVE_ZSTD) + endif(TEST_INCLUDE_BENCH_BITSHUFFLE_N) + + option(TEST_INCLUDE_BENCH_SUITE "Include bench suite in the tests" OFF) + if(TEST_INCLUDE_BENCH_SUITE) + add_test(test_hardsuite blosc blosclz shuffle suite) + endif(TEST_INCLUDE_BENCH_SUITE) + + option(TEST_INCLUDE_BENCH_DEBUGSUITE "Include bench debugsuite in the tests" OFF) + if(TEST_INCLUDE_BENCH_DEBUGSUITE) + add_test(test_debugsuite bench blosclz shuffle debugsuite) + endif(TEST_INCLUDE_BENCH_DEBUGSUITE) +endif(BUILD_TESTS) diff --git a/c-blosc/bench/Makefile b/c-blosc/bench/Makefile new file mode 100644 index 0000000..bb9ad0d --- /dev/null +++ b/c-blosc/bench/Makefile @@ -0,0 +1,40 @@ +CC = gcc # clang++, g++ or just gcc if not compiling Snappy (C++ code) +CFLAGS = -O3 -g -msse2 -Wall +LDFLAGS = -lpthread # for UNIX or for Windows with pthread installed +#LDFLAGS = -static # for mingw +SOURCES = $(wildcard ../blosc/*.c) +EXECUTABLE = bench + +# Support for internal LZ4 and LZ4HC +LZ4_DIR = ../internal-complibs/lz4-1.7.0 +CFLAGS += -DHAVE_LZ4 -I$(LZ4_DIR) +SOURCES += $(wildcard $(LZ4_DIR)/*.c) + +# Support for external LZ4 and LZ4HC +#LDFLAGS += -DHAVE_LZ4 -llz4 + +# Support for internal Snappy +#SNAPPY_DIR = ../internal-complibs/snappy-1.1.1 +#CFLAGS += -DHAVE_SNAPPY -I$(SNAPPY_DIR) +#SOURCES += $(wildcard $(SNAPPY_DIR)/*.cc) + +# Support for external Snappy +LDFLAGS += -DHAVE_SNAPPY -lsnappy + +# Support for external Zlib +LDFLAGS += -DHAVE_ZLIB -lz + +# Support for internal Zlib +#ZLIB_DIR = ../internal-complibs/zlib-1.2.8 +#CFLAGS += -DHAVE_ZLIB -I$(ZLIB_DIR) +#SOURCES += $(wildcard $(ZLIB_DIR)/*.c) + +SOURCES += bench.c + +all: $(SOURCES) $(EXECUTABLE) + +$(EXECUTABLE): $(SOURCES) + $(CC) $(CFLAGS) $(SOURCES) -o $@ $(LDFLAGS) + +clean: + rm -rf $(EXECUTABLE) diff --git a/c-blosc/bench/Makefile.mingw b/c-blosc/bench/Makefile.mingw new file mode 100644 index 0000000..552aa9d --- /dev/null +++ b/c-blosc/bench/Makefile.mingw @@ -0,0 +1,45 @@ +# Makefile for the MinGW suite for Windows +CC = g++ # clang++, g++ or just gcc if not compiling Snappy (C++ code) +CFLAGS = -O3 -g -msse2 -Wall +#LDFLAGS = -lpthread # for UNIX or for Windows with pthread installed +LDFLAGS = -static # for mingw +SOURCES = $(wildcard ../blosc/*.c) +EXECUTABLE = bench + +# Support for internal LZ4 +LZ4_DIR = ../internal-complibs/lz4-1.7.0 +CFLAGS += -DHAVE_LZ4 -I$(LZ4_DIR) +SOURCES += $(wildcard $(LZ4_DIR)/*.c) + +# Support for external LZ4 +#LDFLAGS += -DHAVE_LZ4 -llz4 + +# Support for internal Snappy +SNAPPY_DIR = ../internal-complibs/snappy-1.1.1 +CFLAGS += -DHAVE_SNAPPY -I$(SNAPPY_DIR) +SOURCES += $(wildcard $(SNAPPY_DIR)/*.cc) + +# Support for external Snappy +#LDFLAGS += -DHAVE_SNAPPY -lsnappy + +# Support for the msvc zlib: +ZLIB_ROOT=/libs/zlib128 +LDFLAGS=-DHAVE_ZLIB -I$(ZLIB_ROOT)/include -lzdll -L$(ZLIB_ROOT)/lib + +# Support for the mingw zlib: +#ZLIB_ROOT=/libs/libz-1.2.8 +#LDFLAGS=-DHAVE_ZLIB -I$(ZLIB_ROOT)/include -lz -L$(ZLIB_ROOT)/lib + +# Support for internal Zlib +#ZLIB_DIR = ../internal-complibs/zlib-1.2.8 +#CFLAGS += -DHAVE_ZLIB -I$(ZLIB_DIR) +#SOURCES += $(wildcard $(ZLIB_DIR)/*.c) + + +all: $(SOURCES) $(EXECUTABLE) + +$(EXECUTABLE): $(SOURCES) + $(CC) $(CFLAGS) bench.c $(SOURCES) -o $@ $(LDFLAGS) + +clean: + rm -rf $(EXECUTABLE) diff --git a/c-blosc/bench/bench.c b/c-blosc/bench/bench.c new file mode 100644 index 0000000..39d6aeb --- /dev/null +++ b/c-blosc/bench/bench.c @@ -0,0 +1,584 @@ +/********************************************************************* + Small benchmark for testing basic capabilities of Blosc. + + You can select different degrees of 'randomness' in input buffer, as + well as external datafiles (uncomment the lines after "For data + coming from a file" comment). + + For usage instructions of this benchmark, please see: + + http://blosc.org/synthetic-benchmarks.html + + I'm collecting speeds for different machines, so the output of your + benchmarks and your processor specifications are welcome! + + Author: Francesc Alted + + Note: Compiling this with VS2008 does not work well with cmake. Here + it is a way to compile the benchmark (with added support for LZ4): + + > cl /DHAVE_LZ4 /arch:SSE2 /Ox /Febench.exe /Iblosc /Iinternal-complibs\lz4-1.7.5 bench\bench.c blosc\blosc.c blosc\blosclz.c blosc\shuffle.c blosc\shuffle-sse2.c blosc\shuffle-generic.c blosc\bitshuffle-generic.c blosc\bitshuffle-sse2.c internal-complibs\lz4-1.7.5\*.c + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include +#if defined(_WIN32) + /* For QueryPerformanceCounter(), etc. */ + #include +#elif defined(__MACH__) && defined(__APPLE__) + #include + #include + #include + #include +#elif defined(__unix__) || defined(__HAIKU__) + #include + #if defined(__GLIBC__) + #include + #else + #include + #endif +#else + #error Unable to detect platform. +#endif + + +#include "../blosc/blosc.h" + +#define KB 1024 +#define MB (1024*KB) +#define GB (1024*MB) + +#define NCHUNKS (32*1024) /* maximum number of chunks */ +#define MAX_THREADS 16 + + +int nchunks = NCHUNKS; +int niter = 3; /* default number of iterations */ +double totalsize = 0.; /* total compressed/decompressed size */ + +/* System-specific high-precision timing functions. */ +#if defined(_WIN32) + +/* The type of timestamp used on this system. */ +#define blosc_timestamp_t LARGE_INTEGER + +/* Set a timestamp value to the current time. */ +void blosc_set_timestamp(blosc_timestamp_t* timestamp) { + /* Ignore the return value, assume the call always succeeds. */ + QueryPerformanceCounter(timestamp); +} + +/* Given two timestamp values, return the difference in microseconds. */ +double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) { + LARGE_INTEGER CounterFreq; + QueryPerformanceFrequency(&CounterFreq); + + return (double)(end_time.QuadPart - start_time.QuadPart) / ((double)CounterFreq.QuadPart / 1e6); +} + +#else + +/* The type of timestamp used on this system. */ +#define blosc_timestamp_t struct timespec + +/* Set a timestamp value to the current time. */ +void blosc_set_timestamp(blosc_timestamp_t* timestamp) { +#if defined(__MACH__) && defined(__APPLE__) // OS X does not have clock_gettime, use clock_get_time + clock_serv_t cclock; + mach_timespec_t mts; + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); + clock_get_time(cclock, &mts); + mach_port_deallocate(mach_task_self(), cclock); + timestamp->tv_sec = mts.tv_sec; + timestamp->tv_nsec = mts.tv_nsec; +#else + clock_gettime(CLOCK_MONOTONIC, timestamp); +#endif +} + +/* Given two timestamp values, return the difference in microseconds. */ +double blosc_elapsed_usecs(blosc_timestamp_t start_time, blosc_timestamp_t end_time) { + return (1e6 * (end_time.tv_sec - start_time.tv_sec)) + + (1e-3 * (end_time.tv_nsec - start_time.tv_nsec)); +} + +#endif + +/* Given two timeval stamps, return the difference in seconds */ +double getseconds(blosc_timestamp_t last, blosc_timestamp_t current) { + return 1e-6 * blosc_elapsed_usecs(last, current); +} + +/* Given two timeval stamps, return the time per chunk in usec */ +double get_usec_chunk(blosc_timestamp_t last, blosc_timestamp_t current, int niter, size_t nchunks) { + double elapsed_usecs = (double)blosc_elapsed_usecs(last, current); + return elapsed_usecs / (double)(niter * nchunks); +} + +/* Define posix_memalign for Windows */ +#if defined(_WIN32) +#include + +int posix_memalign(void **memptr, size_t alignment, size_t size) +{ + *memptr = _aligned_malloc(size, alignment); + return 0; +} + +/* Buffers allocated with _aligned_malloc need to be freed with _aligned_free. */ +#define aligned_free(memptr) _aligned_free(memptr) +#else +/* If not using MSVC, aligned memory can be freed in the usual way. */ +#define aligned_free(memptr) free(memptr) +#endif /* defined(_WIN32) && !defined(__MINGW32__) */ + +int get_value(int i, int rshift) { + int v; + + v = (i<<26)^(i<<18)^(i<<11)^(i<<3)^i; + if (rshift < 32) { + v &= (1 << rshift) - 1; + } + return v; +} + + +void init_buffer(void *src, int size, int rshift) { + unsigned int i; + int *_src = (int *)src; + + /* To have reproducible results */ + srand(1); + + /* Initialize the original buffer */ + for (i = 0; i < size/sizeof(int); ++i) { + /* Choose one below */ + /* _src[i] = 0; + * _src[i] = 0x01010101; + * _src[i] = 0x01020304; + * _src[i] = i * 1/.3; + * _src[i] = i; + * _src[i] = rand() >> (32-rshift); */ + _src[i] = get_value(i, rshift); + } +} + + +void do_bench(char *compressor, char *shuffle, int nthreads, int size, int elsize, + int rshift, FILE * ofile) { + void *src, *srccpy; + void *dest[NCHUNKS], *dest2; + int nbytes = 0, cbytes = 0; + int i, j, retcode; + unsigned char *orig, *round; + blosc_timestamp_t last, current; + double tmemcpy, tshuf, tunshuf; + int clevel, doshuffle; + + if (strcmp(shuffle, "shuffle") == 0) { + doshuffle = BLOSC_SHUFFLE; + } + else if (strcmp(shuffle, "bitshuffle") == 0) { + doshuffle = BLOSC_BITSHUFFLE; + } + else if (strcmp(shuffle, "noshuffle") == 0) { + doshuffle = BLOSC_NOSHUFFLE; + } + else abort(); + + blosc_set_nthreads(nthreads); + if(blosc_set_compressor(compressor) < 0){ + printf("Compiled w/o support for compressor: '%s', so sorry.\n", + compressor); + exit(1); + } + + /* Initialize buffers */ + srccpy = malloc(size); + retcode = posix_memalign( (void **)(&src), 32, size); + if (retcode) abort(); + retcode = posix_memalign( (void **)(&dest2), 32, size); + if (retcode) abort(); + + /* zero src to initialize byte on it, and not only multiples of 4 */ + memset(src, 0, size); + init_buffer(src, size, rshift); + memcpy(srccpy, src, size); + for (j = 0; j < nchunks; j++) { + retcode = posix_memalign( (void **)(&dest[j]), 32, size+BLOSC_MAX_OVERHEAD); + if (retcode) abort(); + } + + fprintf(ofile, "--> %d, %d, %d, %d, %s, %s\n", nthreads, size, elsize, rshift, compressor, shuffle); + fprintf(ofile, "********************** Run info ******************************\n"); + fprintf(ofile, "Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE); + fprintf(ofile, "Using synthetic data with %d significant bits (out of 32)\n", rshift); + fprintf(ofile, "Dataset size: %d bytes\tType size: %d bytes\n", size, elsize); + fprintf(ofile, "Working set: %.1f MB\t\t", (size*nchunks) / (float)MB); + fprintf(ofile, "Number of threads: %d\n", nthreads); + fprintf(ofile, "********************** Running benchmarks *********************\n"); + + blosc_set_timestamp(&last); + for (i = 0; i < niter; i++) { + for (j = 0; j < nchunks; j++) { + memcpy(dest[j], src, size); + } + } + blosc_set_timestamp(¤t); + tmemcpy = get_usec_chunk(last, current, niter, nchunks); + fprintf(ofile, "memcpy(write):\t\t %6.1f us, %.1f MB/s\n", + tmemcpy, (size * 1e6) / (tmemcpy*MB)); + + blosc_set_timestamp(&last); + for (i = 0; i < niter; i++) { + for (j = 0; j < nchunks; j++) { + memcpy(dest2, dest[j], size); + } + } + blosc_set_timestamp(¤t); + tmemcpy = get_usec_chunk(last, current, niter, nchunks); + fprintf(ofile, "memcpy(read):\t\t %6.1f us, %.1f MB/s\n", + tmemcpy, (size * 1e6) / (tmemcpy*MB)); + + for (clevel=0; clevel<10; clevel++) { + + fprintf(ofile, "Compression level: %d\n", clevel); + + blosc_set_timestamp(&last); + for (i = 0; i < niter; i++) { + for (j = 0; j < nchunks; j++) { + cbytes = blosc_compress(clevel, doshuffle, elsize, size, src, + dest[j], size+BLOSC_MAX_OVERHEAD); + } + } + blosc_set_timestamp(¤t); + tshuf = get_usec_chunk(last, current, niter, nchunks); + fprintf(ofile, "comp(write):\t %6.1f us, %.1f MB/s\t ", + tshuf, (size * 1e6) / (tshuf*MB)); + fprintf(ofile, "Final bytes: %d ", cbytes); + if (cbytes > 0) { + fprintf(ofile, "Ratio: %3.2f", size/(float)cbytes); + } + fprintf(ofile, "\n"); + + /* Compressor was unable to compress. Copy the buffer manually. */ + if (cbytes == 0) { + for (j = 0; j < nchunks; j++) { + memcpy(dest[j], src, size); + } + } + + blosc_set_timestamp(&last); + for (i = 0; i < niter; i++) { + for (j = 0; j < nchunks; j++) { + if (cbytes == 0) { + memcpy(dest2, dest[j], size); + nbytes = size; + } + else { + nbytes = blosc_decompress(dest[j], dest2, size); + } + } + } + blosc_set_timestamp(¤t); + tunshuf = get_usec_chunk(last, current, niter, nchunks); + fprintf(ofile, "decomp(read):\t %6.1f us, %.1f MB/s\t ", + tunshuf, (nbytes * 1e6) / (tunshuf*MB)); + if (nbytes < 0) { + fprintf(ofile, "FAILED. Error code: %d\n", nbytes); + } + /* fprintf(ofile, "Orig bytes: %d\tFinal bytes: %d\n", cbytes, nbytes); */ + + /* Check if data has had a good roundtrip. + Byte-by-byte comparison is slow, so use 'memcmp' to check whether the + roundtripped data is correct. If not, fall back to the slow path to + print diagnostic messages. */ + orig = (unsigned char *)srccpy; + round = (unsigned char *)dest2; + if (memcmp(orig, round, size) != 0) + { + for(i = 0; i %x, round-trip--> %x\n", orig[i], round[i]); + break; + } + } + } + else { i = size; } + + if (i == size) fprintf(ofile, "OK\n"); + + } /* End clevel loop */ + + + /* To compute the totalsize, we should take into account the 10 + compression levels */ + totalsize += (size * nchunks * niter * 10.); + + aligned_free(src); free(srccpy); aligned_free(dest2); + for (i = 0; i < nchunks; i++) { + aligned_free(dest[i]); + } + +} + + +/* Compute a sensible value for nchunks */ +int get_nchunks(int size_, int ws) { + int nchunks; + + nchunks = ws / size_; + if (nchunks > NCHUNKS) nchunks = NCHUNKS; + if (nchunks < 1) nchunks = 1; + return nchunks; +} + +void print_compress_info(void) +{ + char *name = NULL, *version = NULL; + int ret; + + printf("Blosc version: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE); + + printf("List of supported compressors in this build: %s\n", + blosc_list_compressors()); + + printf("Supported compression libraries:\n"); + ret = blosc_get_complib_info("blosclz", &name, &version); + if (ret >= 0) printf(" %s: %s\n", name, version); + ret = blosc_get_complib_info("lz4", &name, &version); + if (ret >= 0) printf(" %s: %s\n", name, version); + ret = blosc_get_complib_info("snappy", &name, &version); + if (ret >= 0) printf(" %s: %s\n", name, version); + ret = blosc_get_complib_info("zlib", &name, &version); + if (ret >= 0) printf(" %s: %s\n", name, version); + ret = blosc_get_complib_info("zstd", &name, &version); + if (ret >= 0) printf(" %s: %s\n", name, version); + +} + + +int main(int argc, char *argv[]) { + char compressor[32]; + char shuffle[32] = "shuffle"; + char bsuite[32]; + int single = 1; + int suite = 0; + int hard_suite = 0; + int extreme_suite = 0; + int debug_suite = 0; + int nthreads = 4; /* The number of threads */ + int size = 2*MB; /* Buffer size */ + int elsize = 8; /* Datatype size */ + int rshift = 19; /* Significant bits */ + int workingset = 256*MB; /* The maximum allocated memory */ + int nthreads_, size_, elsize_, rshift_, i; + FILE * output_file = stdout; + blosc_timestamp_t last, current; + float totaltime; + char usage[256]; + + print_compress_info(); + + strncpy(usage, "Usage: bench [blosclz | lz4 | lz4hc | snappy | zlib | zstd] " + "[noshuffle | shuffle | bitshuffle] " + "[single | suite | hardsuite | extremesuite | debugsuite] " + "[nthreads] [bufsize(bytes)] [typesize] [sbits]", 255); + + if (argc < 2) { + printf("%s\n", usage); + exit(1); + } + + strcpy(compressor, argv[1]); + + if (strcmp(compressor, "blosclz") != 0 && + strcmp(compressor, "lz4") != 0 && + strcmp(compressor, "lz4hc") != 0 && + strcmp(compressor, "snappy") != 0 && + strcmp(compressor, "zlib") != 0 && + strcmp(compressor, "zstd") != 0) { + printf("No such compressor: '%s'\n", compressor); + printf("%s\n", usage); + exit(2); + } + + if (argc >= 3) { + strcpy(shuffle, argv[2]); + if (strcmp(shuffle, "shuffle") != 0 && + strcmp(shuffle, "bitshuffle") != 0 && + strcmp(shuffle, "noshuffle") != 0) { + printf("No such shuffler: '%s'\n", shuffle); + printf("%s\n", usage); + exit(2); + } + } + + if (argc < 4) + strcpy(bsuite, "single"); + else + strcpy(bsuite, argv[3]); + + if (strcmp(bsuite, "single") == 0) { + single = 1; + } + else if (strcmp(bsuite, "test") == 0) { + single = 1; + workingset = 128*MB; + } + else if (strcmp(bsuite, "suite") == 0) { + suite = 1; + } + else if (strcmp(bsuite, "hardsuite") == 0) { + hard_suite = 1; + workingset = 64*MB; + /* Values here are ending points for loops */ + nthreads = 2; + size = 8*MB; + elsize = 32; + rshift = 32; + } + else if (strcmp(bsuite, "extremesuite") == 0) { + extreme_suite = 1; + workingset = 32*MB; + niter = 1; + /* Values here are ending points for loops */ + nthreads = 4; + size = 16*MB; + elsize = 32; + rshift = 32; + } + else if (strcmp(bsuite, "debugsuite") == 0) { + debug_suite = 1; + workingset = 32*MB; + niter = 1; + /* Warning: values here are starting points for loops. This is + useful for debugging. */ + nthreads = 1; + size = 16*KB; + elsize = 1; + rshift = 0; + } + else { + printf("%s\n", usage); + exit(1); + } + + printf("Using compressor: %s\n", compressor); + printf("Using shuffle type: %s\n", shuffle); + printf("Running suite: %s\n", bsuite); + + if (argc >= 5) { + nthreads = atoi(argv[4]); + } + if (argc >= 6) { + size = atoi(argv[5]); + } + if (argc >= 7) { + elsize = atoi(argv[6]); + } + if (argc >= 8) { + rshift = atoi(argv[7]); + } + + if ((argc >= 9) || !(single || suite || hard_suite || extreme_suite)) { + printf("%s\n", usage); + exit(1); + } + + nchunks = get_nchunks(size, workingset); + blosc_set_timestamp(&last); + + blosc_init(); + + if (suite) { + for (nthreads_=1; nthreads_ <= nthreads; nthreads_++) { + do_bench(compressor, shuffle, nthreads_, size, elsize, rshift, output_file); + } + } + else if (hard_suite) { + /* Let's start the rshift loop by 4 so that 19 is visited. This + is to allow a direct comparison with the plain suite, that runs + precisely at 19 significant bits. */ + for (rshift_ = 4; rshift_ <= rshift; rshift_ += 5) { + for (elsize_ = 1; elsize_ <= elsize; elsize_ *= 2) { + /* The next loop is for getting sizes that are not power of 2 */ + for (i = -elsize_; i <= elsize_; i += elsize_) { + for (size_ = 32*KB; size_ <= size; size_ *= 2) { + nchunks = get_nchunks(size_+i, workingset); + niter = 1; + for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) { + do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file); + blosc_set_timestamp(¤t); + totaltime = (float)getseconds(last, current); + printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n", + totaltime, totalsize / GB); + } + } + } + } + } + } + else if (extreme_suite) { + for (rshift_ = 0; rshift_ <= rshift; rshift_++) { + for (elsize_ = 1; elsize_ <= elsize; elsize_++) { + /* The next loop is for getting sizes that are not power of 2 */ + for (i = -elsize_*2; i <= elsize_*2; i += elsize_) { + for (size_ = 32*KB; size_ <= size; size_ *= 2) { + nchunks = get_nchunks(size_+i, workingset); + for (nthreads_ = 1; nthreads_ <= nthreads; nthreads_++) { + do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file); + blosc_set_timestamp(¤t); + totaltime = (float)getseconds(last, current); + printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n", + totaltime, totalsize / GB); + } + } + } + } + } + } + else if (debug_suite) { + for (rshift_ = rshift; rshift_ <= 32; rshift_++) { + for (elsize_ = elsize; elsize_ <= 32; elsize_++) { + /* The next loop is for getting sizes that are not power of 2 */ + for (i = -elsize_*2; i <= elsize_*2; i += elsize_) { + for (size_ = size; size_ <= 16*MB; size_ *= 2) { + nchunks = get_nchunks(size_+i, workingset); + for (nthreads_ = nthreads; nthreads_ <= 6; nthreads_++) { + do_bench(compressor, shuffle, nthreads_, size_+i, elsize_, rshift_, output_file); + blosc_set_timestamp(¤t); + totaltime = (float)getseconds(last, current); + printf("Elapsed time:\t %6.1f s. Processed data: %.1f GB\n", + totaltime, totalsize / GB); + } + } + } + } + } + } + /* Single mode */ + else { + do_bench(compressor, shuffle, nthreads, size, elsize, rshift, output_file); + } + + /* Print out some statistics */ + blosc_set_timestamp(¤t); + totaltime = (float)getseconds(last, current); + printf("\nRound-trip compr/decompr on %.1f GB\n", totalsize / GB); + printf("Elapsed time:\t %6.1f s, %.1f MB/s\n", + totaltime, totalsize*2*1.1/(MB*totaltime)); + + /* Free blosc resources */ + blosc_free_resources(); + blosc_destroy(); + return 0; +} diff --git a/c-blosc/bench/plot-speeds.py b/c-blosc/bench/plot-speeds.py new file mode 100644 index 0000000..f6b7400 --- /dev/null +++ b/c-blosc/bench/plot-speeds.py @@ -0,0 +1,223 @@ +"""Script for plotting the results of the 'suite' benchmark. +Invoke without parameters for usage hints. + +:Author: Francesc Alted +:Date: 2010-06-01 +""" +from __future__ import print_function + +import matplotlib as mpl +from pylab import * + +KB_ = 1024 +MB_ = 1024*KB_ +GB_ = 1024*MB_ +NCHUNKS = 128 # keep in sync with bench.c + +linewidth=2 +#markers= ['+', ',', 'o', '.', 's', 'v', 'x', '>', '<', '^'] +#markers= [ 'x', '+', 'o', 's', 'v', '^', '>', '<', ] +markers= [ 's', 'o', 'v', '^', '+', 'x', '>', '<', '.', ',' ] +markersize = 8 + +def get_values(filename): + f = open(filename) + values = {"memcpyw": [], "memcpyr": []} + + for line in f: + if line.startswith('-->'): + tmp = line.split('-->')[1] + nthreads, size, elsize, sbits, codec, shuffle = [i for i in tmp.split(', ')] + nthreads, size, elsize, sbits = map(int, (nthreads, size, elsize, sbits)) + values["size"] = size * NCHUNKS / MB_ + values["elsize"] = elsize + values["sbits"] = sbits + values["codec"] = codec + values["shuffle"] = shuffle + # New run for nthreads + (ratios, speedsw, speedsr) = ([], [], []) + # Add a new entry for (ratios, speedw, speedr) + values[nthreads] = (ratios, speedsw, speedsr) + #print("-->", nthreads, size, elsize, sbits) + elif line.startswith('memcpy(write):'): + tmp = line.split(',')[1] + memcpyw = float(tmp.split(' ')[1]) + values["memcpyw"].append(memcpyw) + elif line.startswith('memcpy(read):'): + tmp = line.split(',')[1] + memcpyr = float(tmp.split(' ')[1]) + values["memcpyr"].append(memcpyr) + elif line.startswith('comp(write):'): + tmp = line.split(',')[1] + speedw = float(tmp.split(' ')[1]) + ratio = float(line.split(':')[-1]) + speedsw.append(speedw) + ratios.append(ratio) + elif line.startswith('decomp(read):'): + tmp = line.split(',')[1] + speedr = float(tmp.split(' ')[1]) + speedsr.append(speedr) + if "OK" not in line: + print("WARNING! OK not found in decomp line!") + + f.close() + return nthreads, values + + +def show_plot(plots, yaxis, legends, gtitle, xmax=None, ymax=None): + xlabel('Compresssion ratio') + ylabel('Speed (MB/s)') + title(gtitle) + xlim(0, xmax) + ylim(0, ymax) + grid(True) + +# legends = [f[f.find('-'):f.index('.out')] for f in filenames] +# legends = [l.replace('-', ' ') for l in legends] + #legend([p[0] for p in plots], legends, loc = "upper left") + legend([p[0] for p in plots + if not isinstance(p, mpl.lines.Line2D)], + legends, loc = "best") + + + #subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + print("Saving plot to:", outfile) + savefig(outfile, dpi=64) + else: + show() + +if __name__ == '__main__': + + from optparse import OptionParser + + usage = "usage: %prog [-r] [-o outfile] [-t title ] [-d|-c] filename" + compress_title = 'Compression speed' + decompress_title = 'Decompression speed' + yaxis = 'No axis name' + + parser = OptionParser(usage=usage) + parser.add_option('-o', + '--outfile', + dest='outfile', + help=('filename for output (many extensions ' + 'supported, e.g. .png, .jpg, .pdf)')) + + parser.add_option('-t', + '--title', + dest='title', + help='title of the plot',) + + parser.add_option('-l', + '--limit', + dest='limit', + help='expression to limit number of threads shown',) + + parser.add_option('-x', + '--xmax', + dest='xmax', + help='limit the x-axis', + default=None) + + parser.add_option('-y', + '--ymax', + dest='ymax', + help='limit the y-axis', + default=None) + + parser.add_option('-r', '--report', action='store_true', + dest='report', + help='generate file for reporting ', + default=False) + + parser.add_option('-d', '--decompress', action='store_true', + dest='dspeed', + help='plot decompression data', + default=False) + parser.add_option('-c', '--compress', action='store_true', + dest='cspeed', + help='plot compression data', + default=False) + + (options, args) = parser.parse_args() + if len(args) == 0: + parser.error("No input arguments") + elif len(args) > 1: + parser.error("Too many input arguments") + else: + pass + + if options.report and options.outfile: + parser.error("Can only select one of [-r, -o]") + + if options.dspeed and options.cspeed: + parser.error("Can only select one of [-d, -c]") + elif options.cspeed: + options.dspeed = False + plot_title = compress_title + else: # either neither or dspeed + options.dspeed = True + plot_title = decompress_title + + filename = args[0] + cspeed = options.cspeed + dspeed = options.dspeed + if options.outfile: + outfile = options.outfile + elif options.report: + if cspeed: + outfile = filename[:filename.rindex('.')] + '-compr.png' + else: + outfile = filename[:filename.rindex('.')] + '-decompr.png' + else: + outfile = None + + plots = [] + legends = [] + nthreads, values = get_values(filename) + #print("Values:", values) + + if options.limit: + thread_range = eval(options.limit) + else: + thread_range = range(1, nthreads+1) + + if options.title: + plot_title = options.title + else: + plot_title += " (%(size).1f MB, %(elsize)d bytes, %(sbits)d bits), %(codec)s %(shuffle)s" % values + + gtitle = plot_title + + for nt in thread_range: + #print("Values for %s threads --> %s" % (nt, values[nt])) + (ratios, speedw, speedr) = values[nt] + if cspeed: + speed = speedw + else: + speed = speedr + #plot_ = semilogx(ratios, speed, linewidth=2) + plot_ = plot(ratios, speed, linewidth=2) + plots.append(plot_) + nmarker = nt + if nt >= len(markers): + nmarker = nt%len(markers) + setp(plot_, marker=markers[nmarker], markersize=markersize, + linewidth=linewidth) + legends.append("%d threads" % nt) + + # Add memcpy lines + if cspeed: + mean = np.mean(values["memcpyw"]) + message = "memcpy (write to memory)" + else: + mean = np.mean(values["memcpyr"]) + message = "memcpy (read from memory)" + plot_ = axhline(mean, linewidth=3, linestyle='-.', color='black') + text(4.0, mean+400, message) + plots.append(plot_) + show_plot(plots, yaxis, legends, gtitle, + xmax=int(options.xmax) if options.xmax else None, + ymax=int(options.ymax) if options.ymax else None) + + diff --git a/c-blosc/blosc.pc.in b/c-blosc/blosc.pc.in new file mode 100644 index 0000000..c01273b --- /dev/null +++ b/c-blosc/blosc.pc.in @@ -0,0 +1,14 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +sharedlibdir=${libdir} +includedir=${prefix}/include + +Name: blosc +Description: A blocking, shuffling and lossless compression library +URL: http://blosc.org/ +Version: @BLOSC_VERSION_STRING@ + +Requires: +Libs: -L${libdir} -L${sharedlibdir} -lblosc +Cflags: -I${includedir} diff --git a/c-blosc/blosc/CMakeLists.txt b/c-blosc/blosc/CMakeLists.txt new file mode 100644 index 0000000..1affe10 --- /dev/null +++ b/c-blosc/blosc/CMakeLists.txt @@ -0,0 +1,221 @@ +# a simple way to detect that we are using CMAKE +add_definitions(-DUSING_CMAKE) + +set(INTERNAL_LIBS ${PROJECT_SOURCE_DIR}/internal-complibs) + +# Hide symbols by default unless they're specifically exported. +# This makes it easier to keep the set of exported symbols the +# same across all compilers/platforms. +set(CMAKE_C_VISIBILITY_PRESET hidden) + +# includes +set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) +if(NOT DEACTIVATE_LZ4) + if (LZ4_FOUND) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${LZ4_INCLUDE_DIR}) + else(LZ4_FOUND) + set(LZ4_LOCAL_DIR ${INTERNAL_LIBS}/lz4-1.8.1.2) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${LZ4_LOCAL_DIR}) + endif(LZ4_FOUND) +endif(NOT DEACTIVATE_LZ4) + +if(NOT DEACTIVATE_SNAPPY) + if (SNAPPY_FOUND) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${SNAPPY_INCLUDE_DIR}) + else(SNAPPY_FOUND) + set(SNAPPY_LOCAL_DIR ${INTERNAL_LIBS}/snappy-1.1.1) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${SNAPPY_LOCAL_DIR}) + endif(SNAPPY_FOUND) +endif(NOT DEACTIVATE_SNAPPY) + +if(NOT DEACTIVATE_ZLIB) + if (ZLIB_FOUND) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIR}) + else(ZLIB_FOUND) + set(ZLIB_LOCAL_DIR ${INTERNAL_LIBS}/zlib-1.2.8) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZLIB_LOCAL_DIR}) + endif(ZLIB_FOUND) +endif(NOT DEACTIVATE_ZLIB) + +if (NOT DEACTIVATE_ZSTD) + if (ZSTD_FOUND) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZSTD_INCLUDE_DIR}) + else (ZSTD_FOUND) + set(ZSTD_LOCAL_DIR ${INTERNAL_LIBS}/zstd-1.3.4) + set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZSTD_LOCAL_DIR} ${ZSTD_LOCAL_DIR}/common) + endif (ZSTD_FOUND) +endif (NOT DEACTIVATE_ZSTD) + +include_directories(${BLOSC_INCLUDE_DIRS}) + +# library sources +set(SOURCES blosc.c blosclz.c fastcopy.c shuffle-generic.c bitshuffle-generic.c + blosc-common.h blosc-export.h) +if(COMPILER_SUPPORT_SSE2) + message(STATUS "Adding run-time support for SSE2") + set(SOURCES ${SOURCES} shuffle-sse2.c bitshuffle-sse2.c) +endif(COMPILER_SUPPORT_SSE2) +if(COMPILER_SUPPORT_AVX2) + message(STATUS "Adding run-time support for AVX2") + set(SOURCES ${SOURCES} shuffle-avx2.c bitshuffle-avx2.c) +endif(COMPILER_SUPPORT_AVX2) +set(SOURCES ${SOURCES} shuffle.c) + +# library install directory +set(lib_dir lib${LIB_SUFFIX}) +set(version_string ${BLOSC_VERSION_MAJOR}.${BLOSC_VERSION_MINOR}.${BLOSC_VERSION_PATCH}) + +set(CMAKE_THREAD_PREFER_PTHREAD TRUE) +if(WIN32) + # try to use the system library + find_package(Threads) + if(NOT Threads_FOUND) + message(STATUS "using the internal pthread library for win32 systems.") + set(SOURCES ${SOURCES} win32/pthread.c) + else(NOT Threads_FOUND) + set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) + endif(NOT Threads_FOUND) +else(WIN32) + find_package(Threads REQUIRED) + set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) +endif(WIN32) + +if(NOT DEACTIVATE_LZ4) + if(LZ4_FOUND) + set(LIBS ${LIBS} ${LZ4_LIBRARY}) + else(LZ4_FOUND) + file(GLOB LZ4_FILES ${LZ4_LOCAL_DIR}/*.c) + set(SOURCES ${SOURCES} ${LZ4_FILES}) + endif(LZ4_FOUND) +endif(NOT DEACTIVATE_LZ4) + +if(NOT DEACTIVATE_SNAPPY) + if(SNAPPY_FOUND) + set(LIBS ${LIBS} ${SNAPPY_LIBRARY}) + else(SNAPPY_FOUND) + file(GLOB SNAPPY_FILES ${SNAPPY_LOCAL_DIR}/*.cc) + set(SOURCES ${SOURCES} ${SNAPPY_FILES}) + endif(SNAPPY_FOUND) +endif(NOT DEACTIVATE_SNAPPY) + +if(NOT DEACTIVATE_ZLIB) + if(ZLIB_FOUND) + set(LIBS ${LIBS} ${ZLIB_LIBRARY}) + else(ZLIB_FOUND) + file(GLOB ZLIB_FILES ${ZLIB_LOCAL_DIR}/*.c) + set(SOURCES ${SOURCES} ${ZLIB_FILES}) + endif(ZLIB_FOUND) +endif(NOT DEACTIVATE_ZLIB) + +if (NOT DEACTIVATE_ZSTD) + if (ZSTD_FOUND) + set(LIBS ${LIBS} ${ZSTD_LIBRARY}) + else (ZSTD_FOUND) + file(GLOB ZSTD_FILES + ${ZSTD_LOCAL_DIR}/common/*.c + ${ZSTD_LOCAL_DIR}/compress/*.c + ${ZSTD_LOCAL_DIR}/decompress/*.c) + set(SOURCES ${SOURCES} ${ZSTD_FILES}) + endif (ZSTD_FOUND) +endif (NOT DEACTIVATE_ZSTD) + + +# targets +if (BUILD_SHARED) + add_library(blosc_shared SHARED ${SOURCES}) + set_target_properties(blosc_shared PROPERTIES OUTPUT_NAME blosc) + set_target_properties(blosc_shared PROPERTIES + VERSION ${version_string} + SOVERSION 1 # Change this when an ABI change happens + ) + set_property( + TARGET blosc_shared + APPEND PROPERTY COMPILE_DEFINITIONS BLOSC_SHARED_LIBRARY) +endif() + +# Based on the target architecture and hardware features supported +# by the C compiler, set hardware architecture optimization flags +# for specific shuffle implementations. +if(COMPILER_SUPPORT_SSE2) + if (MSVC) + # MSVC targets SSE2 by default on 64-bit configurations, but not 32-bit configurations. + if (${CMAKE_SIZEOF_VOID_P} EQUAL 4) + set_source_files_properties(shuffle-sse2.c bitshuffle-sse2.c PROPERTIES COMPILE_FLAGS "/arch:SSE2") + endif (${CMAKE_SIZEOF_VOID_P} EQUAL 4) + else (MSVC) + set_source_files_properties(shuffle-sse2.c bitshuffle-sse2.c PROPERTIES COMPILE_FLAGS -msse2) + endif (MSVC) + + # Define a symbol for the shuffle-dispatch implementation + # so it knows SSE2 is supported even though that file is + # compiled without SSE2 support (for portability). + set_property( + SOURCE shuffle.c + APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_SSE2_ENABLED) +endif(COMPILER_SUPPORT_SSE2) +if(COMPILER_SUPPORT_AVX2) + if (MSVC) + set_source_files_properties(shuffle-avx2.c bitshuffle-avx2.c PROPERTIES COMPILE_FLAGS "/arch:AVX2") + else (MSVC) + set_source_files_properties(shuffle-avx2.c bitshuffle-avx2.c PROPERTIES COMPILE_FLAGS -mavx2) + endif (MSVC) + + # Define a symbol for the shuffle-dispatch implementation + # so it knows AVX2 is supported even though that file is + # compiled without AVX2 support (for portability). + set_property( + SOURCE shuffle.c + APPEND PROPERTY COMPILE_DEFINITIONS SHUFFLE_AVX2_ENABLED) +endif(COMPILER_SUPPORT_AVX2) + +# When the option has been selected to compile the test suite, +# compile an additional version of blosc_shared which exports +# some normally-hidden symbols (to facilitate unit testing). +if (BUILD_TESTS) + add_library(blosc_shared_testing SHARED ${SOURCES}) + set_target_properties(blosc_shared_testing PROPERTIES OUTPUT_NAME blosc_testing) + set_property( + TARGET blosc_shared_testing + APPEND PROPERTY COMPILE_DEFINITIONS BLOSC_SHARED_LIBRARY) + set_property( + TARGET blosc_shared_testing + APPEND PROPERTY COMPILE_DEFINITIONS BLOSC_TESTING) + # TEMP : CMake doesn't automatically add -lpthread here like it does + # for the blosc_shared target. Force it for now. + if(UNIX) + set_property( + TARGET blosc_shared_testing + APPEND PROPERTY LINK_FLAGS "-lpthread") + endif() +endif() + +if (BUILD_SHARED) + target_link_libraries(blosc_shared ${LIBS}) + target_include_directories(blosc_shared PUBLIC ${BLOSC_INCLUDE_DIRS}) +endif() + +if (BUILD_TESTS) + target_link_libraries(blosc_shared_testing ${LIBS}) + target_include_directories(blosc_shared_testing PUBLIC ${BLOSC_INCLUDE_DIRS}) +endif() + +if(BUILD_STATIC) + add_library(blosc_static STATIC ${SOURCES}) + set_target_properties(blosc_static PROPERTIES OUTPUT_NAME blosc) + if (MSVC) + set_target_properties(blosc_static PROPERTIES PREFIX lib) + endif() + target_link_libraries(blosc_static ${LIBS}) + target_include_directories(blosc_static PUBLIC ${BLOSC_INCLUDE_DIRS}) +endif(BUILD_STATIC) + +# install +if(BLOSC_INSTALL) + install(FILES blosc.h blosc-export.h DESTINATION include COMPONENT DEV) + if(BUILD_SHARED) + install(TARGETS blosc_shared DESTINATION ${lib_dir} COMPONENT LIB) + endif(BUILD_SHARED) + if(BUILD_STATIC) + install(TARGETS blosc_static DESTINATION ${lib_dir} COMPONENT DEV) + endif(BUILD_STATIC) +endif(BLOSC_INSTALL) diff --git a/c-blosc/blosc/bitshuffle-avx2.c b/c-blosc/blosc/bitshuffle-avx2.c new file mode 100644 index 0000000..91db8f7 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-avx2.c @@ -0,0 +1,248 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * Note: Adapted for c-blosc by Francesc Alted. + * + * See LICENSES/BITSHUFFLE.txt file for details about copyright and + * rights to use. + * + */ + +#include "bitshuffle-generic.h" +#include "bitshuffle-sse2.h" +#include "bitshuffle-avx2.h" + + +/* Make sure AVX2 is available for the compilation target and compiler. */ +#if !defined(__AVX2__) + #error AVX2 is not supported by the target architecture/platform and/or this compiler. +#endif + +#include + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + +static void printymm(__m256i ymm0) +{ + uint8_t buf[32]; + + ((__m256i *)buf)[0] = ymm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15], + buf[16], buf[17], buf[18], buf[19], + buf[20], buf[21], buf[22], buf[23], + buf[24], buf[25], buf[26], buf[27], + buf[28], buf[29], buf[30], buf[31]); +} +#endif + + +/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */ + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_avx2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + int32_t* out_i32; + + size_t nbyte = elem_size * size; + + int64_t count; + + __m256i ymm; + int32_t bt; + size_t ii, kk; + + for (ii = 0; ii + 31 < nbyte; ii += 32) { + ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]); + for (kk = 0; kk < 8; kk++) { + bt = _mm256_movemask_epi8(ymm); + ymm = _mm256_slli_epi16(ymm, 1); + out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_i32 = bt; + } + } + count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 32); + return count; +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_elem_sse2(in, out, size, elem_size, tmp_buf); + CHECK_ERR(count); + count = bshuf_trans_bit_byte_avx2(out, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_avx2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + size_t ii, jj, kk, hh, mm; + + CHECK_MULT_EIGHT(size); + + if (elem_size % 4) + return bshuf_trans_byte_bitrow_sse2(in, out, size, elem_size); + + __m256i ymm_0[8]; + __m256i ymm_1[8]; + __m256i ymm_storeage[8][4]; + + for (jj = 0; jj + 31 < nbyte_row; jj += 32) { + for (ii = 0; ii + 3 < elem_size; ii += 4) { + for (hh = 0; hh < 4; hh ++) { + + for (kk = 0; kk < 8; kk ++){ + ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[ + (ii * 8 + hh * 8 + kk) * nbyte_row + jj]); + } + + for (kk = 0; kk < 4; kk ++){ + ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + } + + for (kk = 0; kk < 2; kk ++){ + for (mm = 0; mm < 2; mm ++){ + ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16( + ymm_1[kk * 4 + mm * 2], + ymm_1[kk * 4 + mm * 2 + 1]); + ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16( + ymm_1[kk * 4 + mm * 2], + ymm_1[kk * 4 + mm * 2 + 1]); + } + } + + for (kk = 0; kk < 4; kk ++){ + ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + } + + for (kk = 0; kk < 8; kk ++){ + ymm_storeage[kk][hh] = ymm_1[kk]; + } + } + + for (mm = 0; mm < 8; mm ++) { + + for (kk = 0; kk < 4; kk ++){ + ymm_0[kk] = ymm_storeage[mm][kk]; + } + + ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]); + ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]); + ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]); + ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]); + + ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32); + ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32); + ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49); + ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49); + + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]); + } + } + } + for (ii = 0; ii < nrows; ii ++ ) { + for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_avx2(void* in, void* out, const size_t size, + const size_t elem_size) { + + CHECK_MULT_EIGHT(size); + + /* With a bit of care, this could be written such that such that it is */ + /* in_buf = out_buf safe. */ + char* in_b = (char*) in; + char* out_b = (char*) out; + + size_t nbyte = elem_size * size; + size_t ii, jj, kk, ind; + + __m256i ymm; + int32_t bt; + + if (elem_size % 4) { + return bshuf_shuffle_bit_eightelem_sse2(in, out, size, elem_size); + } else { + for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = _mm256_movemask_epi8(ymm); + ymm = _mm256_slli_epi16(ymm, 1); + ind = (ii + jj / 8 + (7 - kk) * elem_size); + * (int32_t *) &out_b[ind] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_bitrow_avx2(in, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_shuffle_bit_eightelem_avx2(tmp_buf, out, size, elem_size); + + return count; +} diff --git a/c-blosc/blosc/bitshuffle-avx2.h b/c-blosc/blosc/bitshuffle-avx2.h new file mode 100644 index 0000000..b7a7fcf --- /dev/null +++ b/c-blosc/blosc/bitshuffle-avx2.h @@ -0,0 +1,38 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* AVX2-accelerated shuffle/unshuffle routines. */ + +#ifndef BITSHUFFLE_AVX2_H +#define BITSHUFFLE_AVX2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + AVX2-accelerated bitshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +bshuf_trans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +/** + AVX2-accelerated bitunshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +bshuf_untrans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +#ifdef __cplusplus +} +#endif + +#endif /* BITSHUFFLE_AVX2_H */ diff --git a/c-blosc/blosc/bitshuffle-generic.c b/c-blosc/blosc/bitshuffle-generic.c new file mode 100644 index 0000000..c7b3b47 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-generic.c @@ -0,0 +1,221 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "bitshuffle-generic.h" + + +/* Transpose bytes within elements, starting partway through input. */ +int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + size_t ii, jj, kk; + + CHECK_MULT_EIGHT(start); + + if (size > start) { + /* ii loop separated into 2 loops so the compiler can unroll */ + /* the inner one. */ + for (ii = start; ii + 7 < size; ii += 8) { + for (jj = 0; jj < elem_size; jj++) { + for (kk = 0; kk < 8; kk++) { + out_b[jj * size + ii + kk] + = in_b[ii * elem_size + kk * elem_size + jj]; + } + } + } + for (ii = size - size % 8; ii < size; ii ++) { + for (jj = 0; jj < elem_size; jj++) { + out_b[jj * size + ii] = in_b[ii * elem_size + jj]; + } + } + } + return size * elem_size; +} + + +/* Transpose bytes within elements. */ +int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); +} + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start_byte) { + + const uint64_t* in_b = (const uint64_t*) in; + uint8_t* out_b = (uint8_t*) out; + + uint64_t x, t; + + size_t ii, kk; + size_t nbyte = elem_size * size; + size_t nbyte_bitrow = nbyte / 8; + + uint64_t e=1; + const int little_endian = *(uint8_t *) &e == 1; + const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; + const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; + + CHECK_MULT_EIGHT(nbyte); + CHECK_MULT_EIGHT(start_byte); + + for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { + x = in_b[ii]; + if (little_endian) { + TRANS_BIT_8X8(x, t); + } else { + TRANS_BIT_8X8_BE(x, t); + } + for (kk = 0; kk < 8; kk ++) { + out_b[bit_row_offset + kk * bit_row_skip + ii] = x; + x = x >> 8; + } + } + return size * elem_size; +} + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); +} + + +/* General transpose of an array, optimized for large element sizes. */ +int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda, + const size_t ldb, const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + size_t ii, jj; + for (ii = 0; ii < lda; ii++) { + for (jj = 0; jj < ldb; jj++) { + memcpy(&out_b[(jj*lda + ii) * elem_size], + &in_b[(ii*ldb + jj) * elem_size], elem_size); + } + } + return lda * ldb * elem_size; +} + + +/* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ +int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t nbyte_bitrow = size / 8; + + CHECK_MULT_EIGHT(size); + + return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); + CHECK_ERR(count); + count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + char* in_b = (char*) in; + char* out_b = (char*) out; + + size_t nbyte_row = size / 8; + size_t ii, jj, kk; + + CHECK_MULT_EIGHT(size); + + for (jj = 0; jj < elem_size; jj++) { + for (ii = 0; ii < nbyte_row; ii++) { + for (kk = 0; kk < 8; kk++) { + out_b[ii * 8 * elem_size + jj * 8 + kk] = \ + in_b[(jj * 8 + kk) * nbyte_row + ii]; + } + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \ + const size_t size, const size_t elem_size) { + + const char *in_b; + char *out_b; + uint64_t x, t; + size_t ii, jj, kk; + size_t nbyte, out_index; + + uint64_t e=1; + const int little_endian = *(uint8_t *) &e == 1; + const size_t elem_skip = little_endian ? elem_size : -elem_size; + const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; + + CHECK_MULT_EIGHT(size); + + in_b = (const char*) in; + out_b = (char*) out; + + nbyte = elem_size * size; + + for (jj = 0; jj < 8 * elem_size; jj += 8) { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { + x = *((uint64_t*) &in_b[ii + jj]); + if (little_endian) { + TRANS_BIT_8X8(x, t); + } else { + TRANS_BIT_8X8_BE(x, t); + } + for (kk = 0; kk < 8; kk++) { + out_index = ii + jj / 8 + elem_offset + kk * elem_skip; + *((uint8_t*) &out_b[out_index]) = x; + x = x >> 8; + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); + + return count; +} diff --git a/c-blosc/blosc/bitshuffle-generic.h b/c-blosc/blosc/bitshuffle-generic.h new file mode 100644 index 0000000..25be277 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-generic.h @@ -0,0 +1,161 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Generic (non-hardware-accelerated) shuffle/unshuffle routines. + These are used when hardware-accelerated functions aren't available + for a particular platform; they are also used by the hardware- + accelerated functions to handle any remaining elements in a block + which isn't a multiple of the hardware's vector size. */ + +#ifndef BITSHUFFLE_GENERIC_H +#define BITSHUFFLE_GENERIC_H + +#include "blosc-common.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Macros. */ +#define CHECK_MULT_EIGHT(n) if (n % 8) return -80; +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) +#define CHECK_ERR(count) if (count < 0) { return count; } + + +/* ---- Worker code not requiring special instruction sets. ---- + * + * The following code does not use any x86 specific vectorized instructions + * and should compile on any machine + * + */ + +/* Transpose 8x8 bit array packed into a single quadword *x*. + * *t* is workspace. */ +#define TRANS_BIT_8X8(x, t) { \ + t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL; \ + x = x ^ t ^ (t << 7); \ + t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL; \ + x = x ^ t ^ (t << 14); \ + t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL; \ + x = x ^ t ^ (t << 28); \ + } + +/* Transpose 8x8 bit array along the diagonal from upper right + to lower left */ +#define TRANS_BIT_8X8_BE(x, t) { \ + t = (x ^ (x >> 9)) & 0x0055005500550055LL; \ + x = x ^ t ^ (t << 9); \ + t = (x ^ (x >> 18)) & 0x0000333300003333LL; \ + x = x ^ t ^ (t << 18); \ + t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL; \ + x = x ^ t ^ (t << 36); \ + } + +/* Transpose of an array of arbitrarily typed elements. */ +#define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) { \ + type_t* in_type = (type_t*) in; \ + type_t* out_type = (type_t*) out; \ + size_t ii, jj, kk; \ + for (ii = 0; ii + 7 < lda; ii += 8) { \ + for (jj = 0; jj < ldb; jj++) { \ + for (kk = 0; kk < 8; kk++) { \ + out_type[jj*lda + ii + kk] = \ + in_type[ii*ldb + kk * ldb + jj]; \ + } \ + } \ + } \ + for (ii = lda - lda % 8; ii < lda; ii ++) { \ + for (jj = 0; jj < ldb; jj++) { \ + out_type[jj*lda + ii] = in_type[ii*ldb + jj]; \ + } \ + } \ + } + + +/* Private functions */ +BLOSC_NO_EXPORT int64_t +bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start); + +BLOSC_NO_EXPORT int64_t +bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start_byte); + +BLOSC_NO_EXPORT int64_t +bshuf_trans_elem(const void* in, void* out, const size_t lda, + const size_t ldb, const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, + const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, + const size_t size, const size_t elem_size); + + +/* Bitshuffle the data. + * + * Transpose the bits within elements. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * tmp_buffer : temporary buffer with the same `size` than `in` and `out` + * + * Returns + * ------- + * nothing -- this cannot fail + * + */ + +BLOSC_NO_EXPORT int64_t +bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +/* Unshuffle bitshuffled data. + * + * Untranspose the bits within elements. + * + * To properly unshuffle bitshuffled data, *size* and *elem_size* must + * match the parameters used to shuffle the data. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * tmp_buffer : temporary buffer with the same `size` than `in` and `out` + * + * Returns + * ------- + * nothing -- this cannot fail + * + */ + +BLOSC_NO_EXPORT int64_t +bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + + +#ifdef __cplusplus +} +#endif + +#endif /* BITSHUFFLE_GENERIC_H */ diff --git a/c-blosc/blosc/bitshuffle-sse2.c b/c-blosc/blosc/bitshuffle-sse2.c new file mode 100644 index 0000000..8191ca2 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-sse2.c @@ -0,0 +1,467 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * Note: Adapted for c-blosc by Francesc Alted. + * + * See LICENSES/BITSHUFFLE.txt file for details about copyright and + * rights to use. + * + */ + +#include "bitshuffle-generic.h" +#include "bitshuffle-sse2.h" + +/* Make sure SSE2 is available for the compilation target and compiler. */ +#if !defined(__SSE2__) + #error SSE2 is not supported by the target architecture/platform and/or this compiler. +#endif + +#include + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + + +static void printxmm(__m128i xmm0) +{ + uint8_t buf[32]; + + ((__m128i *)buf)[0] = xmm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15]); +} +#endif + + +/* ---- Worker code that requires SSE2. Intel Petium 4 (2000) and later. ---- */ + +/* Transpose bytes within elements for 16 bit elements. */ +int64_t bshuf_trans_byte_elem_SSE_16(void* in, void* out, const size_t size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + __m128i a0, b0, a1, b1; + size_t ii; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + } + return bshuf_trans_byte_elem_remainder(in, out, size, 2, + size - size % 16); +} + + +/* Transpose bytes within elements for 32 bit elements. */ +int64_t bshuf_trans_byte_elem_SSE_32(void* in, void* out, const size_t size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + __m128i a0, b0, c0, d0, a1, b1, c1, d1; + size_t ii; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]); + c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]); + d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + c0 = _mm_unpacklo_epi8(c1, d1); + d0 = _mm_unpackhi_epi8(c1, d1); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + + a0 = _mm_unpacklo_epi64(a1, c1); + b0 = _mm_unpackhi_epi64(a1, c1); + c0 = _mm_unpacklo_epi64(b1, d1); + d0 = _mm_unpackhi_epi64(b1, d1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); + _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); + } + return bshuf_trans_byte_elem_remainder(in, out, size, 4, + size - size % 16); +} + + +/* Transpose bytes within elements for 64 bit elements. */ +int64_t bshuf_trans_byte_elem_SSE_64(void* in, void* out, const size_t size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + __m128i a0, b0, c0, d0, e0, f0, g0, h0; + __m128i a1, b1, c1, d1, e1, f1, g1, h1; + size_t ii; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]); + c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]); + d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]); + e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]); + f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]); + g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]); + h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + e1 = _mm_unpacklo_epi8(e0, f0); + f1 = _mm_unpackhi_epi8(e0, f0); + g1 = _mm_unpacklo_epi8(g0, h0); + h1 = _mm_unpackhi_epi8(g0, h0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + c0 = _mm_unpacklo_epi8(c1, d1); + d0 = _mm_unpackhi_epi8(c1, d1); + e0 = _mm_unpacklo_epi8(e1, f1); + f0 = _mm_unpackhi_epi8(e1, f1); + g0 = _mm_unpacklo_epi8(g1, h1); + h0 = _mm_unpackhi_epi8(g1, h1); + + a1 = _mm_unpacklo_epi32(a0, c0); + b1 = _mm_unpackhi_epi32(a0, c0); + c1 = _mm_unpacklo_epi32(b0, d0); + d1 = _mm_unpackhi_epi32(b0, d0); + e1 = _mm_unpacklo_epi32(e0, g0); + f1 = _mm_unpackhi_epi32(e0, g0); + g1 = _mm_unpacklo_epi32(f0, h0); + h1 = _mm_unpackhi_epi32(f0, h0); + + a0 = _mm_unpacklo_epi64(a1, e1); + b0 = _mm_unpackhi_epi64(a1, e1); + c0 = _mm_unpacklo_epi64(b1, f1); + d0 = _mm_unpackhi_epi64(b1, f1); + e0 = _mm_unpacklo_epi64(c1, g1); + f0 = _mm_unpackhi_epi64(c1, g1); + g0 = _mm_unpacklo_epi64(d1, h1); + h0 = _mm_unpackhi_epi64(d1, h1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); + _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); + _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0); + _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0); + _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0); + _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0); + } + return bshuf_trans_byte_elem_remainder(in, out, size, 8, + size - size % 16); +} + + +/* Memory copy with bshuf call signature. */ +int64_t bshuf_copy(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + + memcpy(out_b, in_b, size * elem_size); + return size * elem_size; +} + + +/* Transpose bytes within elements using best SSE algorithm available. */ +int64_t bshuf_trans_byte_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + /* Trivial cases: power of 2 bytes. */ + switch (elem_size) { + case 1: + count = bshuf_copy(in, out, size, elem_size); + return count; + case 2: + count = bshuf_trans_byte_elem_SSE_16(in, out, size); + return count; + case 4: + count = bshuf_trans_byte_elem_SSE_32(in, out, size); + return count; + case 8: + count = bshuf_trans_byte_elem_SSE_64(in, out, size); + return count; + } + + /* Worst case: odd number of bytes. Turns out that this is faster for */ + /* (odd * 2) byte elements as well (hence % 4). */ + if (elem_size % 4) { + count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); + return count; + } + + /* Multiple of power of 2: transpose hierarchically. */ + { + size_t nchunk_elem; + + if ((elem_size % 8) == 0) { + nchunk_elem = elem_size / 8; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); + count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); + } else if ((elem_size % 4) == 0) { + nchunk_elem = elem_size / 4; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); + count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); + } else { + /* Not used since scalar algorithm is faster. */ + nchunk_elem = elem_size / 2; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); + count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); + } + + return count; + } +} + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_sse2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + uint16_t* out_ui16; + int64_t count; + size_t nbyte = elem_size * size; + __m128i xmm; + int32_t bt; + size_t ii, kk; + + CHECK_MULT_EIGHT(nbyte); + + for (ii = 0; ii + 15 < nbyte; ii += 16) { + xmm = _mm_loadu_si128((__m128i *) &in_b[ii]); + for (kk = 0; kk < 8; kk++) { + bt = _mm_movemask_epi8(xmm); + xmm = _mm_slli_epi16(xmm, 1); + out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_ui16 = bt; + } + } + count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 16); + return count; +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_elem_sse2(in, out, size, elem_size, tmp_buf); + CHECK_ERR(count); + count = bshuf_trans_bit_byte_sse2(out, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_sse2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + size_t ii, jj; + + __m128i a0, b0, c0, d0, e0, f0, g0, h0; + __m128i a1, b1, c1, d1, e1, f1, g1, h1; + __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; + + CHECK_MULT_EIGHT(size); + + for (ii = 0; ii + 7 < nrows; ii += 8) { + for (jj = 0; jj + 15 < nbyte_row; jj += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]); + b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]); + c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]); + d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]); + e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]); + f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]); + g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]); + h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]); + + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpacklo_epi8(c0, d0); + c1 = _mm_unpacklo_epi8(e0, f0); + d1 = _mm_unpacklo_epi8(g0, h0); + e1 = _mm_unpackhi_epi8(a0, b0); + f1 = _mm_unpackhi_epi8(c0, d0); + g1 = _mm_unpackhi_epi8(e0, f0); + h1 = _mm_unpackhi_epi8(g0, h0); + + + a0 = _mm_unpacklo_epi16(a1, b1); + b0 = _mm_unpacklo_epi16(c1, d1); + c0 = _mm_unpackhi_epi16(a1, b1); + d0 = _mm_unpackhi_epi16(c1, d1); + + e0 = _mm_unpacklo_epi16(e1, f1); + f0 = _mm_unpacklo_epi16(g1, h1); + g0 = _mm_unpackhi_epi16(e1, f1); + h0 = _mm_unpackhi_epi16(g1, h1); + + + a1 = _mm_unpacklo_epi32(a0, b0); + b1 = _mm_unpackhi_epi32(a0, b0); + + c1 = _mm_unpacklo_epi32(c0, d0); + d1 = _mm_unpackhi_epi32(c0, d0); + + e1 = _mm_unpacklo_epi32(e0, f0); + f1 = _mm_unpackhi_epi32(e0, f0); + + g1 = _mm_unpacklo_epi32(g0, h0); + h1 = _mm_unpackhi_epi32(g0, h0); + + /* We don't have a storeh instruction for integers, so interpret */ + /* as a float. Have a storel (_mm_storel_epi64). */ + as = (__m128 *) &a1; + bs = (__m128 *) &b1; + cs = (__m128 *) &c1; + ds = (__m128 *) &d1; + es = (__m128 *) &e1; + fs = (__m128 *) &f1; + gs = (__m128 *) &g1; + hs = (__m128 *) &h1; + + _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as); + _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs); + _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs); + _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds); + _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es); + _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs); + _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs); + _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs); + + _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as); + _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds); + _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es); + _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs); + } + for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; + out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; + out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; + out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; + out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; + out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; + out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; + out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_sse2(void* in, void* out, const size_t size, + const size_t elem_size) { + /* With a bit of care, this could be written such that such that it is */ + /* in_buf = out_buf safe. */ + char* in_b = (char*) in; + uint16_t* out_ui16 = (uint16_t*) out; + + size_t nbyte = elem_size * size; + + __m128i xmm; + int32_t bt; + size_t ii, jj, kk; + size_t ind; + + CHECK_MULT_EIGHT(size); + + if (elem_size % 2) { + bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); + } else { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { + xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = _mm_movemask_epi8(xmm); + xmm = _mm_slli_epi16(xmm, 1); + ind = (ii + jj / 8 + (7 - kk) * elem_size); + out_ui16[ind / 2] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_bitrow_sse2(in, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_shuffle_bit_eightelem_sse2(tmp_buf, out, size, elem_size); + + return count; +} diff --git a/c-blosc/blosc/bitshuffle-sse2.h b/c-blosc/blosc/bitshuffle-sse2.h new file mode 100644 index 0000000..05ae500 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-sse2.h @@ -0,0 +1,52 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* SSE2-accelerated shuffle/unshuffle routines. */ + +#ifndef BITSHUFFLE_SSE2_H +#define BITSHUFFLE_SSE2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +BLOSC_NO_EXPORT int64_t +bshuf_trans_byte_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +BLOSC_NO_EXPORT int64_t +bshuf_trans_byte_bitrow_sse2(void* in, void* out, const size_t size, + const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +bshuf_shuffle_bit_eightelem_sse2(void* in, void* out, const size_t size, + const size_t elem_size); + +/** + SSE2-accelerated bitshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +bshuf_trans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +/** + SSE2-accelerated bitunshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +bshuf_untrans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +#ifdef __cplusplus +} +#endif + + +#endif /* BITSHUFFLE_SSE2_H */ diff --git a/c-blosc/blosc/blosc-common.h b/c-blosc/blosc/blosc-common.h new file mode 100644 index 0000000..0b2c2fc --- /dev/null +++ b/c-blosc/blosc/blosc-common.h @@ -0,0 +1,80 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef SHUFFLE_COMMON_H +#define SHUFFLE_COMMON_H + +#include "blosc-export.h" +#include + +/* Import standard integer type definitions */ +#if defined(_WIN32) && !defined(__MINGW32__) + + /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ + #if defined(_MSC_VER) && _MSC_VER < 1600 + #include "win32/stdint-windows.h" + #else + #include + #endif + + /* Use inlined functions for supported systems */ + #if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ + #define inline __inline /* Visual C is not C99, but supports some kind of inline */ + #endif + +#else + #include +#endif /* _WIN32 */ + + +/* Define the __SSE2__ symbol if compiling with Visual C++ and + targeting the minimum architecture level supporting SSE2. + Other compilers define this as expected and emit warnings + when it is re-defined. */ +#if !defined(__SSE2__) && defined(_MSC_VER) && \ + (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)) + #define __SSE2__ +#endif + +/* + * Detect if the architecture is fine with unaligned access. + */ +#if !defined(BLOSC_STRICT_ALIGN) +#define BLOSC_STRICT_ALIGN +#if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ +#undef BLOSC_STRICT_ALIGN +#elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__386) +#undef BLOSC_STRICT_ALIGN +#elif defined(_X86_) /* MinGW */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__I86__) /* Digital Mars */ +#undef BLOSC_STRICT_ALIGN +/* Seems like unaligned access in ARM (at least ARMv6) is pretty + expensive, so we are going to always enforce strict aligment in ARM. + If anybody suggest that newer ARMs are better, we can revisit this. */ +/* #elif defined(__ARM_FEATURE_UNALIGNED) */ /* ARM, GNU C */ +/* #undef BLOSC_STRICT_ALIGN */ +#elif defined(_ARCH_PPC) || defined(__PPC__) +/* Modern PowerPC systems (like POWER8) should support unaligned access + quite efficiently. */ +#undef BLOSC_STRICT_ALIGN +#endif +#endif + +#if defined(__SSE2__) + #include +#endif +#if defined(__AVX2__) + #include +#endif + +#endif /* SHUFFLE_COMMON_H */ diff --git a/c-blosc/blosc/blosc-export.h b/c-blosc/blosc/blosc-export.h new file mode 100644 index 0000000..49df929 --- /dev/null +++ b/c-blosc/blosc/blosc-export.h @@ -0,0 +1,45 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ +#ifndef BLOSC_EXPORT_H +#define BLOSC_EXPORT_H + +/* Macros for specifying exported symbols. + BLOSC_EXPORT is used to decorate symbols that should be + exported by the blosc shared library. + BLOSC_NO_EXPORT is used to decorate symbols that should NOT + be exported by the blosc shared library. +*/ +#if defined(BLOSC_SHARED_LIBRARY) + #if defined(_MSC_VER) + #define BLOSC_EXPORT __declspec(dllexport) + #elif (defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__) + #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) + #define BLOSC_EXPORT __attribute__((dllexport)) + #else + #define BLOSC_EXPORT __attribute__((visibility("default"))) + #endif /* defined(_WIN32) || defined(__CYGWIN__) */ + #else + #error Cannot determine how to define BLOSC_EXPORT for this compiler. + #endif +#else + #define BLOSC_EXPORT +#endif /* defined(BLOSC_SHARED_LIBRARY) */ + +#if defined(__GNUC__) || defined(__clang__) + #define BLOSC_NO_EXPORT __attribute__((visibility("hidden"))) +#else + #define BLOSC_NO_EXPORT +#endif /* defined(__GNUC__) || defined(__clang__) */ + +/* When testing, export everything to make it easier to implement tests. */ +#if defined(BLOSC_TESTING) + #undef BLOSC_NO_EXPORT + #define BLOSC_NO_EXPORT BLOSC_EXPORT +#endif /* defined(BLOSC_TESTING) */ + +#endif /* BLOSC_EXPORT_H */ diff --git a/c-blosc/blosc/blosc.c b/c-blosc/blosc/blosc.c new file mode 100644 index 0000000..f526f22 --- /dev/null +++ b/c-blosc/blosc/blosc.c @@ -0,0 +1,2186 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2009-05-20 + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + + +#include +#include +#include +#include +#include +#include + +#include "fastcopy.h" + +#if defined(USING_CMAKE) + #include "config.h" +#endif /* USING_CMAKE */ +#include "blosc.h" +#include "shuffle.h" +#include "blosclz.h" +#if defined(HAVE_LZ4) + #include "lz4.h" + #include "lz4hc.h" +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + #include "snappy-c.h" +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + #include "zlib.h" +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + #include "zstd.h" +#endif /* HAVE_ZSTD */ + +#if defined(_WIN32) && !defined(__MINGW32__) + #include + #include + + /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ + #if defined(_MSC_VER) && _MSC_VER < 1600 + #include "win32/stdint-windows.h" + #else + #include + #endif + + #include + #define getpid _getpid +#else + #include + #include + #include +#endif /* _WIN32 */ + +/* Include the win32/pthread.h library for all the Windows builds. See #224. */ +#if defined(_WIN32) + #include "win32/pthread.h" + #include "win32/pthread.c" +#else + #include +#endif + + +/* Some useful units */ +#define KB 1024 +#define MB (1024 * (KB)) + +/* Minimum buffer size to be compressed */ +#define MIN_BUFFERSIZE 128 /* Cannot be smaller than 66 */ + +/* The maximum number of splits in a block for compression */ +#define MAX_SPLITS 16 /* Cannot be larger than 128 */ + +/* The size of L1 cache. 32 KB is quite common nowadays. */ +#define L1 (32 * (KB)) + +/* Have problems using posix barriers when symbol value is 200112L */ +/* This requires more investigation, but will work for the moment */ +#if defined(_POSIX_BARRIERS) && ( (_POSIX_BARRIERS - 20012L) >= 0 && _POSIX_BARRIERS != 200112L) +#define _POSIX_BARRIERS_MINE +#endif +/* Synchronization variables */ + + +struct blosc_context { + int32_t compress; /* 1 if we are doing compression 0 if decompress */ + + const uint8_t* src; + uint8_t* dest; /* The current pos in the destination buffer */ + uint8_t* header_flags; /* Flags for header */ + int compversion; /* Compressor version byte, only used during decompression */ + int32_t sourcesize; /* Number of bytes in source buffer (or uncompressed bytes in compressed file) */ + int32_t nblocks; /* Number of total blocks in buffer */ + int32_t leftover; /* Extra bytes at end of buffer */ + int32_t blocksize; /* Length of the block in bytes */ + int32_t typesize; /* Type size */ + int32_t num_output_bytes; /* Counter for the number of output bytes */ + int32_t destsize; /* Maximum size for destination buffer */ + uint8_t* bstarts; /* Start of the buffer past header info */ + int32_t compcode; /* Compressor code to use */ + int clevel; /* Compression level (1-9) */ + + /* Threading */ + int32_t numthreads; + int32_t threads_started; + int32_t end_threads; + pthread_t threads[BLOSC_MAX_THREADS]; + int32_t tids[BLOSC_MAX_THREADS]; + pthread_mutex_t count_mutex; + #ifdef _POSIX_BARRIERS_MINE + pthread_barrier_t barr_init; + pthread_barrier_t barr_finish; + #else + int32_t count_threads; + pthread_mutex_t count_threads_mutex; + pthread_cond_t count_threads_cv; + #endif + #if !defined(_WIN32) + pthread_attr_t ct_attr; /* creation time attrs for threads */ + #endif + int32_t thread_giveup_code; /* error code when give up */ + int32_t thread_nblock; /* block counter */ +}; + +struct thread_context { + struct blosc_context* parent_context; + int32_t tid; + uint8_t* tmp; + uint8_t* tmp2; + uint8_t* tmp3; + int32_t tmpblocksize; /* Used to keep track of how big the temporary buffers are */ +}; + +/* Global context for non-contextual API */ +static struct blosc_context* g_global_context; +static pthread_mutex_t global_comp_mutex; +static int32_t g_compressor = BLOSC_BLOSCLZ; /* the compressor to use by default */ +static int32_t g_threads = 1; +static int32_t g_force_blocksize = 0; +static int32_t g_initlib = 0; +static int32_t g_splitmode = BLOSC_FORWARD_COMPAT_SPLIT; + + + +/* Wrapped function to adjust the number of threads used by blosc */ +int blosc_set_nthreads_(struct blosc_context*); + +/* Releases the global threadpool */ +int blosc_release_threadpool(struct blosc_context* context); + +/* Macros for synchronization */ + +/* Wait until all threads are initialized */ +#ifdef _POSIX_BARRIERS_MINE +#define WAIT_INIT(RET_VAL, CONTEXT_PTR) \ + rc = pthread_barrier_wait(&CONTEXT_PTR->barr_init); \ + if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { \ + printf("Could not wait on barrier (init): %d\n", rc); \ + return((RET_VAL)); \ + } +#else +#define WAIT_INIT(RET_VAL, CONTEXT_PTR) \ + pthread_mutex_lock(&CONTEXT_PTR->count_threads_mutex); \ + if (CONTEXT_PTR->count_threads < CONTEXT_PTR->numthreads) { \ + CONTEXT_PTR->count_threads++; \ + pthread_cond_wait(&CONTEXT_PTR->count_threads_cv, &CONTEXT_PTR->count_threads_mutex); \ + } \ + else { \ + pthread_cond_broadcast(&CONTEXT_PTR->count_threads_cv); \ + } \ + pthread_mutex_unlock(&CONTEXT_PTR->count_threads_mutex); +#endif + +/* Wait for all threads to finish */ +#ifdef _POSIX_BARRIERS_MINE +#define WAIT_FINISH(RET_VAL, CONTEXT_PTR) \ + rc = pthread_barrier_wait(&CONTEXT_PTR->barr_finish); \ + if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { \ + printf("Could not wait on barrier (finish)\n"); \ + return((RET_VAL)); \ + } +#else +#define WAIT_FINISH(RET_VAL, CONTEXT_PTR) \ + pthread_mutex_lock(&CONTEXT_PTR->count_threads_mutex); \ + if (CONTEXT_PTR->count_threads > 0) { \ + CONTEXT_PTR->count_threads--; \ + pthread_cond_wait(&CONTEXT_PTR->count_threads_cv, &CONTEXT_PTR->count_threads_mutex); \ + } \ + else { \ + pthread_cond_broadcast(&CONTEXT_PTR->count_threads_cv); \ + } \ + pthread_mutex_unlock(&CONTEXT_PTR->count_threads_mutex); +#endif + + +/* A function for aligned malloc that is portable */ +static uint8_t *my_malloc(size_t size) +{ + void *block = NULL; + int res = 0; + +/* Do an alignment to 32 bytes because AVX2 is supported */ +#if defined(_WIN32) + /* A (void *) cast needed for avoiding a warning with MINGW :-/ */ + block = (void *)_aligned_malloc(size, 32); +#elif _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + /* Platform does have an implementation of posix_memalign */ + res = posix_memalign(&block, 32, size); +#else + block = malloc(size); +#endif /* _WIN32 */ + + if (block == NULL || res != 0) { + printf("Error allocating memory!"); + return NULL; + } + + return (uint8_t *)block; +} + + +/* Release memory booked by my_malloc */ +static void my_free(void *block) +{ +#if defined(_WIN32) + _aligned_free(block); +#else + free(block); +#endif /* _WIN32 */ +} + + +/* Copy 4 bytes from `*pa` to int32_t, changing endianness if necessary. */ +static int32_t sw32_(const uint8_t *pa) +{ + int32_t idest; + uint8_t *dest = (uint8_t *)&idest; + int i = 1; /* for big/little endian detection */ + char *p = (char *)&i; + + if (p[0] != 1) { + /* big endian */ + dest[0] = pa[3]; + dest[1] = pa[2]; + dest[2] = pa[1]; + dest[3] = pa[0]; + } + else { + /* little endian */ + dest[0] = pa[0]; + dest[1] = pa[1]; + dest[2] = pa[2]; + dest[3] = pa[3]; + } + return idest; +} + + +/* Copy 4 bytes from `*pa` to `*dest`, changing endianness if necessary. */ +static void _sw32(uint8_t* dest, int32_t a) +{ + uint8_t *pa = (uint8_t *)&a; + int i = 1; /* for big/little endian detection */ + char *p = (char *)&i; + + if (p[0] != 1) { + /* big endian */ + dest[0] = pa[3]; + dest[1] = pa[2]; + dest[2] = pa[1]; + dest[3] = pa[0]; + } + else { + /* little endian */ + dest[0] = pa[0]; + dest[1] = pa[1]; + dest[2] = pa[2]; + dest[3] = pa[3]; + } +} + + +/* + * Conversion routines between compressor and compression libraries + */ + +/* Return the library code associated with the compressor name */ +static int compname_to_clibcode(const char *compname) +{ + if (strcmp(compname, BLOSC_BLOSCLZ_COMPNAME) == 0) + return BLOSC_BLOSCLZ_LIB; + if (strcmp(compname, BLOSC_LZ4_COMPNAME) == 0) + return BLOSC_LZ4_LIB; + if (strcmp(compname, BLOSC_LZ4HC_COMPNAME) == 0) + return BLOSC_LZ4_LIB; + if (strcmp(compname, BLOSC_SNAPPY_COMPNAME) == 0) + return BLOSC_SNAPPY_LIB; + if (strcmp(compname, BLOSC_ZLIB_COMPNAME) == 0) + return BLOSC_ZLIB_LIB; + if (strcmp(compname, BLOSC_ZSTD_COMPNAME) == 0) + return BLOSC_ZSTD_LIB; + return -1; +} + +/* Return the library name associated with the compressor code */ +static const char *clibcode_to_clibname(int clibcode) +{ + if (clibcode == BLOSC_BLOSCLZ_LIB) return BLOSC_BLOSCLZ_LIBNAME; + if (clibcode == BLOSC_LZ4_LIB) return BLOSC_LZ4_LIBNAME; + if (clibcode == BLOSC_SNAPPY_LIB) return BLOSC_SNAPPY_LIBNAME; + if (clibcode == BLOSC_ZLIB_LIB) return BLOSC_ZLIB_LIBNAME; + if (clibcode == BLOSC_ZSTD_LIB) return BLOSC_ZSTD_LIBNAME; + return NULL; /* should never happen */ +} + + +/* + * Conversion routines between compressor names and compressor codes + */ + +/* Get the compressor name associated with the compressor code */ +int blosc_compcode_to_compname(int compcode, const char **compname) +{ + int code = -1; /* -1 means non-existent compressor code */ + const char *name = NULL; + + /* Map the compressor code */ + if (compcode == BLOSC_BLOSCLZ) + name = BLOSC_BLOSCLZ_COMPNAME; + else if (compcode == BLOSC_LZ4) + name = BLOSC_LZ4_COMPNAME; + else if (compcode == BLOSC_LZ4HC) + name = BLOSC_LZ4HC_COMPNAME; + else if (compcode == BLOSC_SNAPPY) + name = BLOSC_SNAPPY_COMPNAME; + else if (compcode == BLOSC_ZLIB) + name = BLOSC_ZLIB_COMPNAME; + else if (compcode == BLOSC_ZSTD) + name = BLOSC_ZSTD_COMPNAME; + + *compname = name; + + /* Guess if there is support for this code */ + if (compcode == BLOSC_BLOSCLZ) + code = BLOSC_BLOSCLZ; +#if defined(HAVE_LZ4) + else if (compcode == BLOSC_LZ4) + code = BLOSC_LZ4; + else if (compcode == BLOSC_LZ4HC) + code = BLOSC_LZ4HC; +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + else if (compcode == BLOSC_SNAPPY) + code = BLOSC_SNAPPY; +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + else if (compcode == BLOSC_ZLIB) + code = BLOSC_ZLIB; +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + else if (compcode == BLOSC_ZSTD) + code = BLOSC_ZSTD; +#endif /* HAVE_ZSTD */ + + return code; +} + +/* Get the compressor code for the compressor name. -1 if it is not available */ +int blosc_compname_to_compcode(const char *compname) +{ + int code = -1; /* -1 means non-existent compressor code */ + + if (strcmp(compname, BLOSC_BLOSCLZ_COMPNAME) == 0) { + code = BLOSC_BLOSCLZ; + } +#if defined(HAVE_LZ4) + else if (strcmp(compname, BLOSC_LZ4_COMPNAME) == 0) { + code = BLOSC_LZ4; + } + else if (strcmp(compname, BLOSC_LZ4HC_COMPNAME) == 0) { + code = BLOSC_LZ4HC; + } +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + else if (strcmp(compname, BLOSC_SNAPPY_COMPNAME) == 0) { + code = BLOSC_SNAPPY; + } +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + else if (strcmp(compname, BLOSC_ZLIB_COMPNAME) == 0) { + code = BLOSC_ZLIB; + } +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + else if (strcmp(compname, BLOSC_ZSTD_COMPNAME) == 0) { + code = BLOSC_ZSTD; + } +#endif /* HAVE_ZSTD */ + +return code; +} + + +#if defined(HAVE_LZ4) +static int lz4_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int accel) +{ + int cbytes; + cbytes = LZ4_compress_fast(input, output, (int)input_length, (int)maxout, + accel); + return cbytes; +} + +static int lz4hc_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int clevel) +{ + int cbytes; + if (input_length > (size_t)(2<<30)) + return -1; /* input larger than 1 GB is not supported */ + /* clevel for lz4hc goes up to 12, at least in LZ4 1.7.5 + * but levels larger than 9 does not buy much compression. */ + cbytes = LZ4_compress_HC(input, output, (int)input_length, (int)maxout, + clevel); + return cbytes; +} + +static int lz4_wrap_decompress(const char* input, size_t compressed_length, + char* output, size_t maxout) +{ + size_t cbytes; + cbytes = LZ4_decompress_fast(input, output, (int)maxout); + if (cbytes != compressed_length) { + return 0; + } + return (int)maxout; +} + +#endif /* HAVE_LZ4 */ + +#if defined(HAVE_SNAPPY) +static int snappy_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout) +{ + snappy_status status; + size_t cl = maxout; + status = snappy_compress(input, input_length, output, &cl); + if (status != SNAPPY_OK){ + return 0; + } + return (int)cl; +} + +static int snappy_wrap_decompress(const char* input, size_t compressed_length, + char* output, size_t maxout) +{ + snappy_status status; + size_t ul = maxout; + status = snappy_uncompress(input, compressed_length, output, &ul); + if (status != SNAPPY_OK){ + return 0; + } + return (int)ul; +} +#endif /* HAVE_SNAPPY */ + +#if defined(HAVE_ZLIB) +/* zlib is not very respectful with sharing name space with others. + Fortunately, its names do not collide with those already in blosc. */ +static int zlib_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int clevel) +{ + int status; + uLongf cl = maxout; + status = compress2( + (Bytef*)output, &cl, (Bytef*)input, (uLong)input_length, clevel); + if (status != Z_OK){ + return 0; + } + return (int)cl; +} + +static int zlib_wrap_decompress(const char* input, size_t compressed_length, + char* output, size_t maxout) +{ + int status; + uLongf ul = maxout; + status = uncompress( + (Bytef*)output, &ul, (Bytef*)input, (uLong)compressed_length); + if (status != Z_OK){ + return 0; + } + return (int)ul; +} +#endif /* HAVE_ZLIB */ + +#if defined(HAVE_ZSTD) +static int zstd_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int clevel) { + size_t code; + clevel = (clevel < 9) ? clevel * 2 - 1 : ZSTD_maxCLevel(); + /* Make the level 8 close enough to maxCLevel */ + if (clevel == 8) clevel = ZSTD_maxCLevel() - 2; + code = ZSTD_compress( + (void*)output, maxout, (void*)input, input_length, clevel); + if (ZSTD_isError(code)) { + return 0; + } + return (int)code; +} + +static int zstd_wrap_decompress(const char* input, size_t compressed_length, + char* output, size_t maxout) { + size_t code; + code = ZSTD_decompress( + (void*)output, maxout, (void*)input, compressed_length); + if (ZSTD_isError(code)) { + fprintf(stderr, "error decompressing with Zstd: %s \n", ZSTD_getErrorName(code)); + return 0; + } + return (int)code; +} +#endif /* HAVE_ZSTD */ + +/* Compute acceleration for blosclz */ +static int get_accel(const struct blosc_context* context) { + int32_t clevel = context->clevel; + + if (context->compcode == BLOSC_LZ4) { + /* This acceleration setting based on discussions held in: + * https://groups.google.com/forum/#!topic/lz4c/zosy90P8MQw + */ + return (10 - clevel); + } + return 1; +} + + +/* Shuffle & compress a single block */ +static int blosc_c(const struct blosc_context* context, int32_t blocksize, + int32_t leftoverblock, int32_t ntbytes, int32_t maxbytes, + const uint8_t *src, uint8_t *dest, uint8_t *tmp, + uint8_t *tmp2) +{ + int8_t header_flags = *(context->header_flags); + int dont_split = (header_flags & 0x10) >> 4; + int32_t j, neblock, nsplits; + int32_t cbytes; /* number of compressed bytes in split */ + int32_t ctbytes = 0; /* number of compressed bytes in block */ + int32_t maxout; + int32_t typesize = context->typesize; + const uint8_t *_tmp = src; + const char *compname; + int accel; + int bscount; + int doshuffle = (header_flags & BLOSC_DOSHUFFLE) && (typesize > 1); + int dobitshuffle = ((header_flags & BLOSC_DOBITSHUFFLE) && + (blocksize >= typesize)); + + if (doshuffle) { + /* Byte shuffling only makes sense if typesize > 1 */ + shuffle(typesize, blocksize, src, tmp); + _tmp = tmp; + } + /* We don't allow more than 1 filter at the same time (yet) */ + else if (dobitshuffle) { + bscount = bitshuffle(typesize, blocksize, src, tmp, tmp2); + if (bscount < 0) + return bscount; + _tmp = tmp; + } + + /* Calculate acceleration for different compressors */ + accel = get_accel(context); + + /* The number of splits for this block */ + if (!dont_split && !leftoverblock) { + nsplits = typesize; + } + else { + nsplits = 1; + } + neblock = blocksize / nsplits; + for (j = 0; j < nsplits; j++) { + dest += sizeof(int32_t); + ntbytes += (int32_t)sizeof(int32_t); + ctbytes += (int32_t)sizeof(int32_t); + maxout = neblock; + #if defined(HAVE_SNAPPY) + if (context->compcode == BLOSC_SNAPPY) { + /* TODO perhaps refactor this to keep the value stashed somewhere */ + maxout = snappy_max_compressed_length(neblock); + } + #endif /* HAVE_SNAPPY */ + if (ntbytes+maxout > maxbytes) { + maxout = maxbytes - ntbytes; /* avoid buffer overrun */ + if (maxout <= 0) { + return 0; /* non-compressible block */ + } + } + if (context->compcode == BLOSC_BLOSCLZ) { + cbytes = blosclz_compress(context->clevel, _tmp+j*neblock, neblock, + dest, maxout); + } + #if defined(HAVE_LZ4) + else if (context->compcode == BLOSC_LZ4) { + cbytes = lz4_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout, accel); + } + else if (context->compcode == BLOSC_LZ4HC) { + cbytes = lz4hc_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout, + context->clevel); + } + #endif /* HAVE_LZ4 */ + #if defined(HAVE_SNAPPY) + else if (context->compcode == BLOSC_SNAPPY) { + cbytes = snappy_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout); + } + #endif /* HAVE_SNAPPY */ + #if defined(HAVE_ZLIB) + else if (context->compcode == BLOSC_ZLIB) { + cbytes = zlib_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout, + context->clevel); + } + #endif /* HAVE_ZLIB */ + #if defined(HAVE_ZSTD) + else if (context->compcode == BLOSC_ZSTD) { + cbytes = zstd_wrap_compress((char*)_tmp + j * neblock, (size_t)neblock, + (char*)dest, (size_t)maxout, context->clevel); + } + #endif /* HAVE_ZSTD */ + + else { + blosc_compcode_to_compname(context->compcode, &compname); + fprintf(stderr, "Blosc has not been compiled with '%s' ", compname); + fprintf(stderr, "compression support. Please use one having it."); + return -5; /* signals no compression support */ + } + + if (cbytes > maxout) { + /* Buffer overrun caused by compression (should never happen) */ + return -1; + } + else if (cbytes < 0) { + /* cbytes should never be negative */ + return -2; + } + else if (cbytes == 0 || cbytes == neblock) { + /* The compressor has been unable to compress data at all. */ + /* Before doing the copy, check that we are not running into a + buffer overflow. */ + if ((ntbytes+neblock) > maxbytes) { + return 0; /* Non-compressible data */ + } + fastcopy(dest, _tmp + j * neblock, neblock); + cbytes = neblock; + } + _sw32(dest - 4, cbytes); + dest += cbytes; + ntbytes += cbytes; + ctbytes += cbytes; + } /* Closes j < nsplits */ + + return ctbytes; +} + +/* Decompress & unshuffle a single block */ +static int blosc_d(struct blosc_context* context, int32_t blocksize, + int32_t leftoverblock, const uint8_t *src, uint8_t *dest, + uint8_t *tmp, uint8_t *tmp2) +{ + int8_t header_flags = *(context->header_flags); + int32_t compformat = (header_flags & 0xe0) >> 5; + int dont_split = (header_flags & 0x10) >> 4; + int32_t j, neblock, nsplits; + int32_t nbytes; /* number of decompressed bytes in split */ + int32_t cbytes; /* number of compressed bytes in split */ + int32_t ctbytes = 0; /* number of compressed bytes in block */ + int32_t ntbytes = 0; /* number of uncompressed bytes in block */ + uint8_t *_tmp = dest; + int32_t typesize = context->typesize; + const char *compname; + int bscount; + int doshuffle = (header_flags & BLOSC_DOSHUFFLE) && (typesize > 1); + int dobitshuffle = ((header_flags & BLOSC_DOBITSHUFFLE) && + (blocksize >= typesize)); + int compversion = context->compversion; + + if (doshuffle || dobitshuffle) { + _tmp = tmp; + } + + /* The number of splits for this block */ + if (!dont_split && + /* For compatibility with before the introduction of the split flag */ + ((typesize <= MAX_SPLITS) && (blocksize/typesize) >= MIN_BUFFERSIZE) && + !leftoverblock) { + nsplits = typesize; + } + else { + nsplits = 1; + } + + neblock = blocksize / nsplits; + for (j = 0; j < nsplits; j++) { + cbytes = sw32_(src); /* amount of compressed bytes */ + src += sizeof(int32_t); + ctbytes += (int32_t)sizeof(int32_t); + /* Uncompress */ + if (cbytes == neblock) { + fastcopy(_tmp, src, neblock); + nbytes = neblock; + } + else { + if (compformat == BLOSC_BLOSCLZ_FORMAT) { + if (compversion != BLOSC_BLOSCLZ_VERSION_FORMAT) { + fprintf(stderr, "Unrecognized BloscLZ version %d\n", compversion); + return -9; + } + nbytes = blosclz_decompress(src, cbytes, _tmp, neblock); + } + #if defined(HAVE_LZ4) + else if (compformat == BLOSC_LZ4_FORMAT) { + if (compversion != BLOSC_LZ4_VERSION_FORMAT) { + fprintf(stderr, "Unrecognized LZ4 version %d\n", compversion); + return -9; + } + nbytes = lz4_wrap_decompress((char *)src, (size_t)cbytes, + (char*)_tmp, (size_t)neblock); + } + #endif /* HAVE_LZ4 */ + #if defined(HAVE_SNAPPY) + else if (compformat == BLOSC_SNAPPY_FORMAT) { + if (compversion != BLOSC_SNAPPY_VERSION_FORMAT) { + fprintf(stderr, "Unrecognized Snappy version %d\n", compversion); + return -9; + } + nbytes = snappy_wrap_decompress((char *)src, (size_t)cbytes, + (char*)_tmp, (size_t)neblock); + } + #endif /* HAVE_SNAPPY */ + #if defined(HAVE_ZLIB) + else if (compformat == BLOSC_ZLIB_FORMAT) { + if (compversion != BLOSC_ZLIB_VERSION_FORMAT) { + fprintf(stderr, "Unrecognized Zlib version %d\n", compversion); + return -9; + } + nbytes = zlib_wrap_decompress((char *)src, (size_t)cbytes, + (char*)_tmp, (size_t)neblock); + } + #endif /* HAVE_ZLIB */ + #if defined(HAVE_ZSTD) + else if (compformat == BLOSC_ZSTD_FORMAT) { + if (compversion != BLOSC_ZSTD_VERSION_FORMAT) { + fprintf(stderr, "Unrecognized Zstd version %d\n", compversion); + return -9; + } + nbytes = zstd_wrap_decompress((char*)src, (size_t)cbytes, + (char*)_tmp, (size_t)neblock); + } + #endif /* HAVE_ZSTD */ + else { + compname = clibcode_to_clibname(compformat); + fprintf(stderr, + "Blosc has not been compiled with decompression " + "support for '%s' format. ", compname); + fprintf(stderr, "Please recompile for adding this support.\n"); + return -5; /* signals no decompression support */ + } + + /* Check that decompressed bytes number is correct */ + if (nbytes != neblock) { + return -2; + } + + } + src += cbytes; + ctbytes += cbytes; + _tmp += nbytes; + ntbytes += nbytes; + } /* Closes j < nsplits */ + + if (doshuffle) { + unshuffle(typesize, blocksize, tmp, dest); + } + else if (dobitshuffle) { + bscount = bitunshuffle(typesize, blocksize, tmp, dest, tmp2); + if (bscount < 0) + return bscount; + } + + /* Return the number of uncompressed bytes */ + return ntbytes; +} + + +/* Serial version for compression/decompression */ +static int serial_blosc(struct blosc_context* context) +{ + int32_t j, bsize, leftoverblock; + int32_t cbytes; + + int32_t ebsize = context->blocksize + context->typesize * (int32_t)sizeof(int32_t); + int32_t ntbytes = context->num_output_bytes; + + uint8_t *tmp = my_malloc(context->blocksize + ebsize); + uint8_t *tmp2 = tmp + context->blocksize; + + for (j = 0; j < context->nblocks; j++) { + if (context->compress && !(*(context->header_flags) & BLOSC_MEMCPYED)) { + _sw32(context->bstarts + j * 4, ntbytes); + } + bsize = context->blocksize; + leftoverblock = 0; + if ((j == context->nblocks - 1) && (context->leftover > 0)) { + bsize = context->leftover; + leftoverblock = 1; + } + if (context->compress) { + if (*(context->header_flags) & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(context->dest + BLOSC_MAX_OVERHEAD + j * context->blocksize, + context->src + j * context->blocksize, bsize); + cbytes = bsize; + } + else { + /* Regular compression */ + cbytes = blosc_c(context, bsize, leftoverblock, ntbytes, + context->destsize, context->src+j*context->blocksize, + context->dest+ntbytes, tmp, tmp2); + if (cbytes == 0) { + ntbytes = 0; /* uncompressible data */ + break; + } + } + } + else { + if (*(context->header_flags) & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(context->dest + j * context->blocksize, + context->src + BLOSC_MAX_OVERHEAD + j * context->blocksize, bsize); + cbytes = bsize; + } + else { + /* Regular decompression */ + cbytes = blosc_d(context, bsize, leftoverblock, + context->src + sw32_(context->bstarts + j * 4), + context->dest+j*context->blocksize, tmp, tmp2); + } + } + if (cbytes < 0) { + ntbytes = cbytes; /* error in blosc_c or blosc_d */ + break; + } + ntbytes += cbytes; + } + + // Free temporaries + my_free(tmp); + + return ntbytes; +} + + +/* Threaded version for compression/decompression */ +static int parallel_blosc(struct blosc_context* context) +{ + int rc; + + /* Check whether we need to restart threads */ + blosc_set_nthreads_(context); + + /* Set sentinels */ + context->thread_giveup_code = 1; + context->thread_nblock = -1; + + /* Synchronization point for all threads (wait for initialization) */ + WAIT_INIT(-1, context); + + /* Synchronization point for all threads (wait for finalization) */ + WAIT_FINISH(-1, context); + + if (context->thread_giveup_code > 0) { + /* Return the total bytes (de-)compressed in threads */ + return context->num_output_bytes; + } + else { + /* Compression/decompression gave up. Return error code. */ + return context->thread_giveup_code; + } +} + + +/* Do the compression or decompression of the buffer depending on the + global params. */ +static int do_job(struct blosc_context* context) +{ + int32_t ntbytes; + + /* Run the serial version when nthreads is 1 or when the buffers are + not much larger than blocksize */ + if (context->numthreads == 1 || (context->sourcesize / context->blocksize) <= 1) { + ntbytes = serial_blosc(context); + } + else { + ntbytes = parallel_blosc(context); + } + + return ntbytes; +} + + +/* Whether a codec is meant for High Compression Ratios */ +#define HCR(codec) ( \ + ((codec) == BLOSC_LZ4HC) || \ + ((codec) == BLOSC_ZLIB) || \ + ((codec) == BLOSC_ZSTD) ? 1 : 0 ) + + +/* Conditions for splitting a block before compressing with a codec. */ +static int split_block(int compcode, int typesize, int blocksize) { + int splitblock = -1; + + switch (g_splitmode) { + case BLOSC_ALWAYS_SPLIT: + splitblock = 1; + break; + case BLOSC_NEVER_SPLIT: + splitblock = 0; + break; + case BLOSC_AUTO_SPLIT: + /* Normally all the compressors designed for speed benefit from a + split. However, in conducted benchmarks LZ4 seems that it runs + faster if we don't split, which is quite surprising. */ + splitblock= (((compcode == BLOSC_BLOSCLZ) || + (compcode == BLOSC_SNAPPY)) && + (typesize <= MAX_SPLITS) && + (blocksize / typesize) >= MIN_BUFFERSIZE); + break; + case BLOSC_FORWARD_COMPAT_SPLIT: + /* The zstd support was introduced at the same time than the split flag, so + * there should be not a problem with not splitting bloscks with it */ + splitblock = ((compcode != BLOSC_ZSTD) && + (typesize <= MAX_SPLITS) && + (blocksize / typesize) >= MIN_BUFFERSIZE); + break; + default: + fprintf(stderr, "Split mode %d not supported", g_splitmode); + } + return splitblock; +} + + +static int32_t compute_blocksize(struct blosc_context* context, int32_t clevel, + int32_t typesize, int32_t nbytes, + int32_t forced_blocksize) +{ + int32_t blocksize; + + /* Protection against very small buffers */ + if (nbytes < (int32_t)typesize) { + return 1; + } + + blocksize = nbytes; /* Start by a whole buffer as blocksize */ + + if (forced_blocksize) { + blocksize = forced_blocksize; + /* Check that forced blocksize is not too small */ + if (blocksize < MIN_BUFFERSIZE) { + blocksize = MIN_BUFFERSIZE; + } + } + else if (nbytes >= L1) { + blocksize = L1; + + /* For HCR codecs, increase the block sizes by a factor of 2 because they + are meant for compressing large blocks (i.e. they show a big overhead + when compressing small ones). */ + if (HCR(context->compcode)) { + blocksize *= 2; + } + + switch (clevel) { + case 0: + /* Case of plain copy */ + blocksize /= 4; + break; + case 1: + blocksize /= 2; + break; + case 2: + blocksize *= 1; + break; + case 3: + blocksize *= 2; + break; + case 4: + case 5: + blocksize *= 4; + break; + case 6: + case 7: + case 8: + blocksize *= 8; + break; + case 9: + blocksize *= 8; + if (HCR(context->compcode)) { + blocksize *= 2; + } + break; + default: + assert(0); + break; + } + } + + /* Enlarge the blocksize for splittable codecs */ + if (clevel > 0 && split_block(context->compcode, typesize, blocksize)) { + if (blocksize > (1 << 16)) { + /* Do not use a too large split buffer (> 64 KB) for splitting codecs */ + blocksize = (1 << 16); + } + blocksize *= typesize; + if (blocksize < (1 << 16)) { + /* Do not use a too small blocksize (< 64 KB) when typesize is small */ + blocksize = (1 << 16); + } + } + + /* Check that blocksize is not too large */ + if (blocksize > (int32_t)nbytes) { + blocksize = nbytes; + } + + /* blocksize *must absolutely* be a multiple of the typesize */ + if (blocksize > typesize) { + blocksize = blocksize / typesize * typesize; + } + + return blocksize; +} + +static int initialize_context_compression(struct blosc_context* context, + int clevel, + int doshuffle, + size_t typesize, + size_t sourcesize, + const void* src, + void* dest, + size_t destsize, + int32_t compressor, + int32_t blocksize, + int32_t numthreads) +{ + /* Set parameters */ + context->compress = 1; + context->src = (const uint8_t*)src; + context->dest = (uint8_t *)(dest); + context->num_output_bytes = 0; + context->destsize = (int32_t)destsize; + context->sourcesize = sourcesize; + context->typesize = typesize; + context->compcode = compressor; + context->numthreads = numthreads; + context->end_threads = 0; + context->clevel = clevel; + + /* Check buffer size limits */ + if (sourcesize > BLOSC_MAX_BUFFERSIZE) { + /* If buffer is too large, give up. */ + fprintf(stderr, "Input buffer size cannot exceed %d bytes\n", + BLOSC_MAX_BUFFERSIZE); + return -1; + } + + /* Compression level */ + if (clevel < 0 || clevel > 9) { + /* If clevel not in 0..9, print an error */ + fprintf(stderr, "`clevel` parameter must be between 0 and 9!\n"); + return -10; + } + + /* Shuffle */ + if (doshuffle != 0 && doshuffle != 1 && doshuffle != 2) { + fprintf(stderr, "`shuffle` parameter must be either 0, 1 or 2!\n"); + return -10; + } + + /* Check typesize limits */ + if (context->typesize > BLOSC_MAX_TYPESIZE) { + /* If typesize is too large, treat buffer as an 1-byte stream. */ + context->typesize = 1; + } + + /* Get the blocksize */ + context->blocksize = compute_blocksize(context, clevel, (int32_t)context->typesize, context->sourcesize, blocksize); + + /* Compute number of blocks in buffer */ + context->nblocks = context->sourcesize / context->blocksize; + context->leftover = context->sourcesize % context->blocksize; + context->nblocks = (context->leftover > 0) ? (context->nblocks + 1) : context->nblocks; + + return 1; +} + + +static int write_compression_header(struct blosc_context* context, int clevel, int doshuffle) +{ + int32_t compformat; + int dont_split; + + /* Write version header for this block */ + context->dest[0] = BLOSC_VERSION_FORMAT; /* blosc format version */ + + /* Write compressor format */ + compformat = -1; + switch (context->compcode) + { + case BLOSC_BLOSCLZ: + compformat = BLOSC_BLOSCLZ_FORMAT; + context->dest[1] = BLOSC_BLOSCLZ_VERSION_FORMAT; /* blosclz format version */ + break; + +#if defined(HAVE_LZ4) + case BLOSC_LZ4: + compformat = BLOSC_LZ4_FORMAT; + context->dest[1] = BLOSC_LZ4_VERSION_FORMAT; /* lz4 format version */ + break; + case BLOSC_LZ4HC: + compformat = BLOSC_LZ4HC_FORMAT; + context->dest[1] = BLOSC_LZ4HC_VERSION_FORMAT; /* lz4hc is the same as lz4 */ + break; +#endif /* HAVE_LZ4 */ + +#if defined(HAVE_SNAPPY) + case BLOSC_SNAPPY: + compformat = BLOSC_SNAPPY_FORMAT; + context->dest[1] = BLOSC_SNAPPY_VERSION_FORMAT; /* snappy format version */ + break; +#endif /* HAVE_SNAPPY */ + +#if defined(HAVE_ZLIB) + case BLOSC_ZLIB: + compformat = BLOSC_ZLIB_FORMAT; + context->dest[1] = BLOSC_ZLIB_VERSION_FORMAT; /* zlib format version */ + break; +#endif /* HAVE_ZLIB */ + +#if defined(HAVE_ZSTD) + case BLOSC_ZSTD: + compformat = BLOSC_ZSTD_FORMAT; + context->dest[1] = BLOSC_ZSTD_VERSION_FORMAT; /* zstd format version */ + break; +#endif /* HAVE_ZSTD */ + + default: + { + const char *compname; + compname = clibcode_to_clibname(compformat); + fprintf(stderr, "Blosc has not been compiled with '%s' ", compname); + fprintf(stderr, "compression support. Please use one having it."); + return -5; /* signals no compression support */ + break; + } + } + + context->header_flags = context->dest+2; /* flags */ + context->dest[2] = 0; /* zeroes flags */ + context->dest[3] = (uint8_t)context->typesize; /* type size */ + _sw32(context->dest + 4, context->sourcesize); /* size of the buffer */ + _sw32(context->dest + 8, context->blocksize); /* block size */ + context->bstarts = context->dest + 16; /* starts for every block */ + context->num_output_bytes = 16 + sizeof(int32_t)*context->nblocks; /* space for header and pointers */ + + if (context->clevel == 0) { + /* Compression level 0 means buffer to be memcpy'ed */ + *(context->header_flags) |= BLOSC_MEMCPYED; + context->num_output_bytes = 16; /* space just for header */ + } + + if (context->sourcesize < MIN_BUFFERSIZE) { + /* Buffer is too small. Try memcpy'ing. */ + *(context->header_flags) |= BLOSC_MEMCPYED; + context->num_output_bytes = 16; /* space just for header */ + } + + if (doshuffle == BLOSC_SHUFFLE) { + /* Byte-shuffle is active */ + *(context->header_flags) |= BLOSC_DOSHUFFLE; /* bit 0 set to one in flags */ + } + + if (doshuffle == BLOSC_BITSHUFFLE) { + /* Bit-shuffle is active */ + *(context->header_flags) |= BLOSC_DOBITSHUFFLE; /* bit 2 set to one in flags */ + } + + dont_split = !split_block(context->compcode, context->typesize, + context->blocksize); + *(context->header_flags) |= dont_split << 4; /* dont_split is in bit 4 */ + *(context->header_flags) |= compformat << 5; /* compressor format starts at bit 5 */ + + return 1; +} + + +int blosc_compress_context(struct blosc_context* context) +{ + int32_t ntbytes = 0; + + if ((*(context->header_flags) & BLOSC_MEMCPYED) && + (context->sourcesize + BLOSC_MAX_OVERHEAD > context->destsize)) { + return 0; /* data cannot be copied without overrun destination */ + } + + /* Do the actual compression */ + ntbytes = do_job(context); + if (ntbytes < 0) { + return -1; + } + if ((ntbytes == 0) && (context->sourcesize + BLOSC_MAX_OVERHEAD <= context->destsize)) { + /* Last chance for fitting `src` buffer in `dest`. Update flags and force a copy. */ + *(context->header_flags) |= BLOSC_MEMCPYED; + context->num_output_bytes = BLOSC_MAX_OVERHEAD; /* reset the output bytes in previous step */ + ntbytes = do_job(context); + if (ntbytes < 0) { + return -1; + } + } + + /* Set the number of compressed bytes in header */ + _sw32(context->dest + 12, ntbytes); + + assert(ntbytes <= context->destsize); + return ntbytes; +} + +/* The public routine for compression with context. */ +int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void* src, void* dest, + size_t destsize, const char* compressor, + size_t blocksize, int numinternalthreads) +{ + int error, result; + struct blosc_context context; + + context.threads_started = 0; + error = initialize_context_compression(&context, clevel, doshuffle, typesize, + nbytes, src, dest, destsize, + blosc_compname_to_compcode(compressor), + blocksize, numinternalthreads); + if (error < 0) { return error; } + + error = write_compression_header(&context, clevel, doshuffle); + if (error < 0) { return error; } + + result = blosc_compress_context(&context); + + if (numinternalthreads > 1) + { + blosc_release_threadpool(&context); + } + + return result; +} + +/* The public routine for compression. See blosc.h for docstrings. */ +int blosc_compress(int clevel, int doshuffle, size_t typesize, size_t nbytes, + const void *src, void *dest, size_t destsize) +{ + int error; + int result; + char* envvar; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + /* Check for a BLOSC_CLEVEL environment variable */ + envvar = getenv("BLOSC_CLEVEL"); + if (envvar != NULL) { + long value; + value = strtol(envvar, NULL, 10); + if ((value != EINVAL) && (value >= 0)) { + clevel = (int)value; + } + } + + /* Check for a BLOSC_SHUFFLE environment variable */ + envvar = getenv("BLOSC_SHUFFLE"); + if (envvar != NULL) { + if (strcmp(envvar, "NOSHUFFLE") == 0) { + doshuffle = BLOSC_NOSHUFFLE; + } + if (strcmp(envvar, "SHUFFLE") == 0) { + doshuffle = BLOSC_SHUFFLE; + } + if (strcmp(envvar, "BITSHUFFLE") == 0) { + doshuffle = BLOSC_BITSHUFFLE; + } + } + + /* Check for a BLOSC_TYPESIZE environment variable */ + envvar = getenv("BLOSC_TYPESIZE"); + if (envvar != NULL) { + long value; + value = strtol(envvar, NULL, 10); + if ((value != EINVAL) && (value > 0)) { + typesize = (int)value; + } + } + + /* Check for a BLOSC_COMPRESSOR environment variable */ + envvar = getenv("BLOSC_COMPRESSOR"); + if (envvar != NULL) { + result = blosc_set_compressor(envvar); + if (result < 0) { return result; } + } + + /* Check for a BLOSC_COMPRESSOR environment variable */ + envvar = getenv("BLOSC_BLOCKSIZE"); + if (envvar != NULL) { + long blocksize; + blocksize = strtol(envvar, NULL, 10); + if ((blocksize != EINVAL) && (blocksize > 0)) { + blosc_set_blocksize((size_t)blocksize); + } + } + + /* Check for a BLOSC_NTHREADS environment variable */ + envvar = getenv("BLOSC_NTHREADS"); + if (envvar != NULL) { + long nthreads; + nthreads = strtol(envvar, NULL, 10); + if ((nthreads != EINVAL) && (nthreads > 0)) { + result = blosc_set_nthreads((int)nthreads); + if (result < 0) { return result; } + } + } + + /* Check for a BLOSC_SPLITMODE environment variable */ + envvar = getenv("BLOSC_SPLITMODE"); + if (envvar != NULL) { + if (strcmp(envvar, "FORWARD_COMPAT") == 0) { + blosc_set_splitmode(BLOSC_FORWARD_COMPAT_SPLIT); + } + else if (strcmp(envvar, "AUTO") == 0) { + blosc_set_splitmode(BLOSC_AUTO_SPLIT); + } + else if (strcmp(envvar, "ALWAYS") == 0) { + blosc_set_splitmode(BLOSC_ALWAYS_SPLIT); + } + else if (strcmp(envvar, "NEVER") == 0) { + blosc_set_splitmode(BLOSC_NEVER_SPLIT); + } + else { + fprintf(stderr, "BLOSC_SPLITMODE environment variable '%s' not recognized\n", envvar); + return -1; + } + } + + /* Check for a BLOSC_NOLOCK environment variable. It is important + that this should be the last env var so that it can take the + previous ones into account */ + envvar = getenv("BLOSC_NOLOCK"); + if (envvar != NULL) { + const char *compname; + blosc_compcode_to_compname(g_compressor, &compname); + result = blosc_compress_ctx(clevel, doshuffle, typesize, + nbytes, src, dest, destsize, + compname, g_force_blocksize, g_threads); + return result; + } + + pthread_mutex_lock(&global_comp_mutex); + + error = initialize_context_compression(g_global_context, clevel, doshuffle, + typesize, nbytes, src, dest, destsize, + g_compressor, g_force_blocksize, + g_threads); + if (error < 0) { return error; } + + error = write_compression_header(g_global_context, clevel, doshuffle); + if (error < 0) { return error; } + + result = blosc_compress_context(g_global_context); + + pthread_mutex_unlock(&global_comp_mutex); + + return result; +} + +int blosc_run_decompression_with_context(struct blosc_context* context, + const void* src, + void* dest, + size_t destsize, + int numinternalthreads) +{ + uint8_t version; + int32_t ntbytes; + + context->compress = 0; + context->src = (const uint8_t*)src; + context->dest = (uint8_t*)dest; + context->destsize = destsize; + context->num_output_bytes = 0; + context->numthreads = numinternalthreads; + context->end_threads = 0; + + /* Read the header block */ + version = context->src[0]; /* blosc format version */ + context->compversion = context->src[1]; + + context->header_flags = (uint8_t*)(context->src + 2); /* flags */ + context->typesize = (int32_t)context->src[3]; /* typesize */ + context->sourcesize = sw32_(context->src + 4); /* buffer size */ + context->blocksize = sw32_(context->src + 8); /* block size */ + + if (version != BLOSC_VERSION_FORMAT) { + /* Version from future */ + return -1; + } + if (*context->header_flags & 0x08) { + /* compressor flags from the future */ + return -1; + } + + context->bstarts = (uint8_t*)(context->src + 16); + /* Compute some params */ + /* Total blocks */ + context->nblocks = context->sourcesize / context->blocksize; + context->leftover = context->sourcesize % context->blocksize; + context->nblocks = (context->leftover>0)? context->nblocks+1: context->nblocks; + + /* Check that we have enough space to decompress */ + if (context->sourcesize > (int32_t)destsize) { + return -1; + } + + /* Do the actual decompression */ + ntbytes = do_job(context); + if (ntbytes < 0) { + return -1; + } + + assert(ntbytes <= (int32_t)destsize); + return ntbytes; +} + +/* The public routine for decompression with context. */ +int blosc_decompress_ctx(const void *src, void *dest, size_t destsize, + int numinternalthreads) +{ + int result; + struct blosc_context context; + + context.threads_started = 0; + result = blosc_run_decompression_with_context(&context, src, dest, destsize, numinternalthreads); + + if (numinternalthreads > 1) + { + blosc_release_threadpool(&context); + } + + return result; +} + + +/* The public routine for decompression. See blosc.h for docstrings. */ +int blosc_decompress(const void *src, void *dest, size_t destsize) +{ + int result; + char* envvar; + long nthreads; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + /* Check for a BLOSC_NTHREADS environment variable */ + envvar = getenv("BLOSC_NTHREADS"); + if (envvar != NULL) { + nthreads = strtol(envvar, NULL, 10); + if ((nthreads != EINVAL) && (nthreads > 0)) { + result = blosc_set_nthreads((int)nthreads); + if (result < 0) { return result; } + } + } + + /* Check for a BLOSC_NOLOCK environment variable. It is important + that this should be the last env var so that it can take the + previous ones into account */ + envvar = getenv("BLOSC_NOLOCK"); + if (envvar != NULL) { + result = blosc_decompress_ctx(src, dest, destsize, g_threads); + return result; + } + + pthread_mutex_lock(&global_comp_mutex); + + result = blosc_run_decompression_with_context(g_global_context, src, dest, + destsize, g_threads); + + pthread_mutex_unlock(&global_comp_mutex); + + return result; +} + + +/* Specific routine optimized for decompression a small number of + items out of a compressed chunk. This does not use threads because + it would affect negatively to performance. */ +int blosc_getitem(const void *src, int start, int nitems, void *dest) +{ + uint8_t *_src=NULL; /* current pos for source buffer */ + uint8_t version, compversion; /* versions for compressed header */ + uint8_t flags; /* flags for header */ + int32_t ntbytes = 0; /* the number of uncompressed bytes */ + int32_t nblocks; /* number of total blocks in buffer */ + int32_t leftover; /* extra bytes at end of buffer */ + uint8_t *bstarts; /* start pointers for each block */ + int32_t typesize, blocksize, nbytes; + int32_t j, bsize, bsize2, leftoverblock; + int32_t cbytes, startb, stopb; + int stop = start + nitems; + uint8_t *tmp; + uint8_t *tmp2; + uint8_t *tmp3; + int32_t ebsize; + + _src = (uint8_t *)(src); + + /* Read the header block */ + version = _src[0]; /* blosc format version */ + compversion = _src[1]; + flags = _src[2]; /* flags */ + typesize = (int32_t)_src[3]; /* typesize */ + nbytes = sw32_(_src + 4); /* buffer size */ + blocksize = sw32_(_src + 8); /* block size */ + + if (version != BLOSC_VERSION_FORMAT) + return -9; + + ebsize = blocksize + typesize * (int32_t)sizeof(int32_t); + tmp = my_malloc(blocksize + ebsize + blocksize); + tmp2 = tmp + blocksize; + tmp3 = tmp + blocksize + ebsize; + + _src += 16; + bstarts = _src; + /* Compute some params */ + /* Total blocks */ + nblocks = nbytes / blocksize; + leftover = nbytes % blocksize; + nblocks = (leftover>0)? nblocks+1: nblocks; + _src += sizeof(int32_t)*nblocks; + + /* Check region boundaries */ + if ((start < 0) || (start*typesize > nbytes)) { + fprintf(stderr, "`start` out of bounds"); + return -1; + } + + if ((stop < 0) || (stop*typesize > nbytes)) { + fprintf(stderr, "`start`+`nitems` out of bounds"); + return -1; + } + + for (j = 0; j < nblocks; j++) { + bsize = blocksize; + leftoverblock = 0; + if ((j == nblocks - 1) && (leftover > 0)) { + bsize = leftover; + leftoverblock = 1; + } + + /* Compute start & stop for each block */ + startb = start * typesize - j * blocksize; + stopb = stop * typesize - j * blocksize; + if ((startb >= (int)blocksize) || (stopb <= 0)) { + continue; + } + if (startb < 0) { + startb = 0; + } + if (stopb > (int)blocksize) { + stopb = blocksize; + } + bsize2 = stopb - startb; + + /* Do the actual data copy */ + if (flags & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy((uint8_t *) dest + ntbytes, + (uint8_t *) src + BLOSC_MAX_OVERHEAD + j * blocksize + startb, bsize2); + cbytes = bsize2; + } + else { + struct blosc_context context = {0}; + /* Only initialize the fields blosc_d uses */ + context.typesize = typesize; + context.header_flags = &flags; + context.compversion = compversion; + + /* Regular decompression. Put results in tmp2. */ + cbytes = blosc_d(&context, bsize, leftoverblock, + (uint8_t *)src + sw32_(bstarts + j * 4), + tmp2, tmp, tmp3); + if (cbytes < 0) { + ntbytes = cbytes; + break; + } + /* Copy to destination */ + fastcopy((uint8_t *) dest + ntbytes, tmp2 + startb, bsize2); + cbytes = bsize2; + } + ntbytes += cbytes; + } + + my_free(tmp); + + return ntbytes; +} + + +/* Decompress & unshuffle several blocks in a single thread */ +static void *t_blosc(void *ctxt) +{ + struct thread_context* context = (struct thread_context*)ctxt; + int32_t cbytes, ntdest; + int32_t tblocks; /* number of blocks per thread */ + int32_t leftover2; + int32_t tblock; /* limit block on a thread */ + int32_t nblock_; /* private copy of nblock */ + int32_t bsize, leftoverblock; + /* Parameters for threads */ + int32_t blocksize; + int32_t ebsize; + int32_t compress; + int32_t maxbytes; + int32_t ntbytes; + int32_t flags; + int32_t nblocks; + int32_t leftover; + uint8_t *bstarts; + const uint8_t *src; + uint8_t *dest; + uint8_t *tmp; + uint8_t *tmp2; + uint8_t *tmp3; + int rc; + + while(1) + { + /* Synchronization point for all threads (wait for initialization) */ + WAIT_INIT(NULL, context->parent_context); + + if(context->parent_context->end_threads) + { + break; + } + + /* Get parameters for this thread before entering the main loop */ + blocksize = context->parent_context->blocksize; + ebsize = blocksize + context->parent_context->typesize * (int32_t)sizeof(int32_t); + compress = context->parent_context->compress; + flags = *(context->parent_context->header_flags); + maxbytes = context->parent_context->destsize; + nblocks = context->parent_context->nblocks; + leftover = context->parent_context->leftover; + bstarts = context->parent_context->bstarts; + src = context->parent_context->src; + dest = context->parent_context->dest; + + if (blocksize > context->tmpblocksize) + { + my_free(context->tmp); + context->tmp = my_malloc(blocksize + ebsize + blocksize); + context->tmp2 = context->tmp + blocksize; + context->tmp3 = context->tmp + blocksize + ebsize; + } + + tmp = context->tmp; + tmp2 = context->tmp2; + tmp3 = context->tmp3; + + ntbytes = 0; /* only useful for decompression */ + + if (compress && !(flags & BLOSC_MEMCPYED)) { + /* Compression always has to follow the block order */ + pthread_mutex_lock(&context->parent_context->count_mutex); + context->parent_context->thread_nblock++; + nblock_ = context->parent_context->thread_nblock; + pthread_mutex_unlock(&context->parent_context->count_mutex); + tblock = nblocks; + } + else { + /* Decompression can happen using any order. We choose + sequential block order on each thread */ + + /* Blocks per thread */ + tblocks = nblocks / context->parent_context->numthreads; + leftover2 = nblocks % context->parent_context->numthreads; + tblocks = (leftover2>0)? tblocks+1: tblocks; + + nblock_ = context->tid*tblocks; + tblock = nblock_ + tblocks; + if (tblock > nblocks) { + tblock = nblocks; + } + } + + /* Loop over blocks */ + leftoverblock = 0; + while ((nblock_ < tblock) && context->parent_context->thread_giveup_code > 0) { + bsize = blocksize; + if (nblock_ == (nblocks - 1) && (leftover > 0)) { + bsize = leftover; + leftoverblock = 1; + } + if (compress) { + if (flags & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(dest + BLOSC_MAX_OVERHEAD + nblock_ * blocksize, src + nblock_ * blocksize, + bsize); + cbytes = bsize; + } + else { + /* Regular compression */ + cbytes = blosc_c(context->parent_context, bsize, leftoverblock, 0, ebsize, + src+nblock_*blocksize, tmp2, tmp, tmp3); + } + } + else { + if (flags & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(dest + nblock_ * blocksize, src + BLOSC_MAX_OVERHEAD + nblock_ * blocksize, + bsize); + cbytes = bsize; + } + else { + cbytes = blosc_d(context->parent_context, bsize, leftoverblock, + src + sw32_(bstarts + nblock_ * 4), + dest+nblock_*blocksize, + tmp, tmp2); + } + } + + /* Check whether current thread has to giveup */ + if (context->parent_context->thread_giveup_code <= 0) { + break; + } + + /* Check results for the compressed/decompressed block */ + if (cbytes < 0) { /* compr/decompr failure */ + /* Set giveup_code error */ + pthread_mutex_lock(&context->parent_context->count_mutex); + context->parent_context->thread_giveup_code = cbytes; + pthread_mutex_unlock(&context->parent_context->count_mutex); + break; + } + + if (compress && !(flags & BLOSC_MEMCPYED)) { + /* Start critical section */ + pthread_mutex_lock(&context->parent_context->count_mutex); + ntdest = context->parent_context->num_output_bytes; + _sw32(bstarts + nblock_ * 4, ntdest); /* update block start counter */ + if ( (cbytes == 0) || (ntdest+cbytes > maxbytes) ) { + context->parent_context->thread_giveup_code = 0; /* uncompressible buffer */ + pthread_mutex_unlock(&context->parent_context->count_mutex); + break; + } + context->parent_context->thread_nblock++; + nblock_ = context->parent_context->thread_nblock; + context->parent_context->num_output_bytes += cbytes; /* update return bytes counter */ + pthread_mutex_unlock(&context->parent_context->count_mutex); + /* End of critical section */ + + /* Copy the compressed buffer to destination */ + fastcopy(dest + ntdest, tmp2, cbytes); + } + else { + nblock_++; + /* Update counter for this thread */ + ntbytes += cbytes; + } + + } /* closes while (nblock_) */ + + /* Sum up all the bytes decompressed */ + if ((!compress || (flags & BLOSC_MEMCPYED)) && context->parent_context->thread_giveup_code > 0) { + /* Update global counter for all threads (decompression only) */ + pthread_mutex_lock(&context->parent_context->count_mutex); + context->parent_context->num_output_bytes += ntbytes; + pthread_mutex_unlock(&context->parent_context->count_mutex); + } + + /* Meeting point for all threads (wait for finalization) */ + WAIT_FINISH(NULL, context->parent_context); + } + + /* Cleanup our working space and context */ + my_free(context->tmp); + my_free(context); + + return(NULL); +} + + +static int init_threads(struct blosc_context* context) +{ + int32_t tid; + int rc2; + int32_t ebsize; + struct thread_context* thread_context; + + /* Initialize mutex and condition variable objects */ + pthread_mutex_init(&context->count_mutex, NULL); + + /* Set context thread sentinels */ + context->thread_giveup_code = 1; + context->thread_nblock = -1; + + /* Barrier initialization */ +#ifdef _POSIX_BARRIERS_MINE + pthread_barrier_init(&context->barr_init, NULL, context->numthreads+1); + pthread_barrier_init(&context->barr_finish, NULL, context->numthreads+1); +#else + pthread_mutex_init(&context->count_threads_mutex, NULL); + pthread_cond_init(&context->count_threads_cv, NULL); + context->count_threads = 0; /* Reset threads counter */ +#endif + +#if !defined(_WIN32) + /* Initialize and set thread detached attribute */ + pthread_attr_init(&context->ct_attr); + pthread_attr_setdetachstate(&context->ct_attr, PTHREAD_CREATE_JOINABLE); +#endif + + /* Finally, create the threads in detached state */ + for (tid = 0; tid < context->numthreads; tid++) { + context->tids[tid] = tid; + + /* Create a thread context thread owns context (will destroy when finished) */ + thread_context = (struct thread_context*)my_malloc(sizeof(struct thread_context)); + thread_context->parent_context = context; + thread_context->tid = tid; + + ebsize = context->blocksize + context->typesize * (int32_t)sizeof(int32_t); + thread_context->tmp = my_malloc(context->blocksize + ebsize + context->blocksize); + thread_context->tmp2 = thread_context->tmp + context->blocksize; + thread_context->tmp3 = thread_context->tmp + context->blocksize + ebsize; + thread_context->tmpblocksize = context->blocksize; + +#if !defined(_WIN32) + rc2 = pthread_create(&context->threads[tid], &context->ct_attr, t_blosc, (void *)thread_context); +#else + rc2 = pthread_create(&context->threads[tid], NULL, t_blosc, (void *)thread_context); +#endif + if (rc2) { + fprintf(stderr, "ERROR; return code from pthread_create() is %d\n", rc2); + fprintf(stderr, "\tError detail: %s\n", strerror(rc2)); + return(-1); + } + } + + + return(0); +} + +int blosc_get_nthreads(void) +{ + int ret = g_threads; + + return ret; +} + +int blosc_set_nthreads(int nthreads_new) +{ + int ret = g_threads; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + if (nthreads_new != ret){ + /* Re-initialize Blosc */ + blosc_destroy(); + blosc_init(); + g_threads = nthreads_new; + } + + return ret; +} + +int blosc_set_nthreads_(struct blosc_context* context) +{ + if (context->numthreads > BLOSC_MAX_THREADS) { + fprintf(stderr, + "Error. nthreads cannot be larger than BLOSC_MAX_THREADS (%d)", + BLOSC_MAX_THREADS); + return -1; + } + else if (context->numthreads <= 0) { + fprintf(stderr, "Error. nthreads must be a positive integer"); + return -1; + } + + /* Launch a new pool of threads */ + if (context->numthreads > 1 && context->numthreads != context->threads_started) { + blosc_release_threadpool(context); + init_threads(context); + } + + /* We have now started the threads */ + context->threads_started = context->numthreads; + + return context->numthreads; +} + +const char* blosc_get_compressor(void) +{ + const char* compname; + blosc_compcode_to_compname(g_compressor, &compname); + + return compname; +} + +int blosc_set_compressor(const char *compname) +{ + int code = blosc_compname_to_compcode(compname); + + g_compressor = code; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + return code; +} + +const char* blosc_list_compressors(void) +{ + static int compressors_list_done = 0; + static char ret[256]; + + if (compressors_list_done) return ret; + ret[0] = '\0'; + strcat(ret, BLOSC_BLOSCLZ_COMPNAME); +#if defined(HAVE_LZ4) + strcat(ret, ","); strcat(ret, BLOSC_LZ4_COMPNAME); + strcat(ret, ","); strcat(ret, BLOSC_LZ4HC_COMPNAME); +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + strcat(ret, ","); strcat(ret, BLOSC_SNAPPY_COMPNAME); +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + strcat(ret, ","); strcat(ret, BLOSC_ZLIB_COMPNAME); +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + strcat(ret, ","); strcat(ret, BLOSC_ZSTD_COMPNAME); +#endif /* HAVE_ZSTD */ + compressors_list_done = 1; + return ret; +} + +const char* blosc_get_version_string(void) +{ + return BLOSC_VERSION_STRING; +} + +int blosc_get_complib_info(const char *compname, char **complib, char **version) +{ + int clibcode; + const char *clibname; + const char *clibversion = "unknown"; + +#if (defined(HAVE_LZ4) && defined(LZ4_VERSION_MAJOR)) || (defined(HAVE_SNAPPY) && defined(SNAPPY_VERSION)) || defined(ZSTD_VERSION_MAJOR) + char sbuffer[256]; +#endif + + clibcode = compname_to_clibcode(compname); + clibname = clibcode_to_clibname(clibcode); + + /* complib version */ + if (clibcode == BLOSC_BLOSCLZ_LIB) { + clibversion = BLOSCLZ_VERSION_STRING; + } +#if defined(HAVE_LZ4) + else if (clibcode == BLOSC_LZ4_LIB) { +#if defined(LZ4_VERSION_MAJOR) + sprintf(sbuffer, "%d.%d.%d", + LZ4_VERSION_MAJOR, LZ4_VERSION_MINOR, LZ4_VERSION_RELEASE); + clibversion = sbuffer; +#endif /* LZ4_VERSION_MAJOR */ + } +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + else if (clibcode == BLOSC_SNAPPY_LIB) { +#if defined(SNAPPY_VERSION) + sprintf(sbuffer, "%d.%d.%d", SNAPPY_MAJOR, SNAPPY_MINOR, SNAPPY_PATCHLEVEL); + clibversion = sbuffer; +#endif /* SNAPPY_VERSION */ + } +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + else if (clibcode == BLOSC_ZLIB_LIB) { + clibversion = ZLIB_VERSION; + } +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + else if (clibcode == BLOSC_ZSTD_LIB) { + sprintf(sbuffer, "%d.%d.%d", + ZSTD_VERSION_MAJOR, ZSTD_VERSION_MINOR, ZSTD_VERSION_RELEASE); + clibversion = sbuffer; + } +#endif /* HAVE_ZSTD */ + + *complib = strdup(clibname); + *version = strdup(clibversion); + return clibcode; +} + +/* Return `nbytes`, `cbytes` and `blocksize` from a compressed buffer. */ +void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes, + size_t *cbytes, size_t *blocksize) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + uint8_t version = _src[0]; /* version of header */ + + if (version != BLOSC_VERSION_FORMAT) { + *nbytes = *blocksize = *cbytes = 0; + return; + } + + /* Read the interesting values */ + *nbytes = (size_t)sw32_(_src + 4); /* uncompressed buffer size */ + *blocksize = (size_t)sw32_(_src + 8); /* block size */ + *cbytes = (size_t)sw32_(_src + 12); /* compressed buffer size */ +} + + +/* Return `typesize` and `flags` from a compressed buffer. */ +void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize, + int *flags) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + + uint8_t version = _src[0]; /* version of header */ + + if (version != BLOSC_VERSION_FORMAT) { + *flags = *typesize = 0; + return; + } + + /* Read the interesting values */ + *flags = (int)_src[2] & 7; /* first three flags */ + *typesize = (size_t)_src[3]; /* typesize */ +} + + +/* Return version information from a compressed buffer. */ +void blosc_cbuffer_versions(const void *cbuffer, int *version, + int *versionlz) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + + /* Read the version info */ + *version = (int)_src[0]; /* blosc format version */ + *versionlz = (int)_src[1]; /* Lempel-Ziv compressor format version */ +} + + +/* Return the compressor library/format used in a compressed buffer. */ +const char *blosc_cbuffer_complib(const void *cbuffer) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + int clibcode; + const char *complib; + + /* Read the compressor format/library info */ + clibcode = (_src[2] & 0xe0) >> 5; + complib = clibcode_to_clibname(clibcode); + return complib; +} + +/* Get the internal blocksize to be used during compression. 0 means + that an automatic blocksize is computed internally. */ +int blosc_get_blocksize(void) +{ + return (int)g_force_blocksize; +} + +/* Force the use of a specific blocksize. If 0, an automatic + blocksize will be used (the default). */ +void blosc_set_blocksize(size_t size) +{ + g_force_blocksize = (int32_t)size; +} + +/* Force the use of a specific split mode. */ +void blosc_set_splitmode(int mode) +{ + g_splitmode = mode; +} + +void blosc_init(void) +{ + /* Return if we are already initialized */ + if (g_initlib) return; + + pthread_mutex_init(&global_comp_mutex, NULL); + g_global_context = (struct blosc_context*)my_malloc(sizeof(struct blosc_context)); + g_global_context->threads_started = 0; + g_initlib = 1; +} + +void blosc_destroy(void) +{ + /* Return if Blosc is not initialized */ + if (!g_initlib) return; + + g_initlib = 0; + blosc_release_threadpool(g_global_context); + my_free(g_global_context); + pthread_mutex_destroy(&global_comp_mutex); +} + +int blosc_release_threadpool(struct blosc_context* context) +{ + int32_t t; + void* status; + int rc; + int rc2; + + if (context->threads_started > 0) + { + /* Tell all existing threads to finish */ + context->end_threads = 1; + + /* Sync threads */ + WAIT_INIT(-1, context); + + /* Join exiting threads */ + for (t=0; tthreads_started; t++) { + rc2 = pthread_join(context->threads[t], &status); + if (rc2) { + fprintf(stderr, "ERROR; return code from pthread_join() is %d\n", rc2); + fprintf(stderr, "\tError detail: %s\n", strerror(rc2)); + } + } + + /* Release mutex and condition variable objects */ + pthread_mutex_destroy(&context->count_mutex); + + /* Barriers */ + #ifdef _POSIX_BARRIERS_MINE + pthread_barrier_destroy(&context->barr_init); + pthread_barrier_destroy(&context->barr_finish); + #else + pthread_mutex_destroy(&context->count_threads_mutex); + pthread_cond_destroy(&context->count_threads_cv); + #endif + + /* Thread attributes */ + #if !defined(_WIN32) + pthread_attr_destroy(&context->ct_attr); + #endif + + } + + context->threads_started = 0; + + return 0; +} + +int blosc_free_resources(void) +{ + /* Return if Blosc is not initialized */ + if (!g_initlib) return -1; + + return blosc_release_threadpool(g_global_context); +} diff --git a/c-blosc/blosc/blosc.h b/c-blosc/blosc/blosc.h new file mode 100644 index 0000000..5e52427 --- /dev/null +++ b/c-blosc/blosc/blosc.h @@ -0,0 +1,512 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ +#ifndef BLOSC_H +#define BLOSC_H + +#include +#include +#include "blosc-export.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Version numbers */ +#define BLOSC_VERSION_MAJOR 1 /* for major interface/format changes */ +#define BLOSC_VERSION_MINOR 14 /* for minor interface/format changes */ +#define BLOSC_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ + +#define BLOSC_VERSION_STRING "1.14.3" /* string version. Sync with above! */ +#define BLOSC_VERSION_REVISION "$Rev$" /* revision version */ +#define BLOSC_VERSION_DATE "$Date:: 2018-04-06 #$" /* date version */ + +#define BLOSCLZ_VERSION_STRING "1.1.0" /* the internal compressor version */ + +/* The *_FORMAT symbols should be just 1-byte long */ +#define BLOSC_VERSION_FORMAT 2 /* Blosc format version, starting at 1 */ + +/* Minimum header length */ +#define BLOSC_MIN_HEADER_LENGTH 16 + +/* The maximum overhead during compression in bytes. This equals to + BLOSC_MIN_HEADER_LENGTH now, but can be higher in future + implementations */ +#define BLOSC_MAX_OVERHEAD BLOSC_MIN_HEADER_LENGTH + +/* Maximum source buffer size to be compressed */ +#define BLOSC_MAX_BUFFERSIZE (INT_MAX - BLOSC_MAX_OVERHEAD) + +/* Maximum typesize before considering source buffer as a stream of bytes */ +#define BLOSC_MAX_TYPESIZE 255 /* Cannot be larger than 255 */ + +/* The maximum number of threads (for some static arrays) */ +#define BLOSC_MAX_THREADS 256 + +/* Codes for shuffling (see blosc_compress) */ +#define BLOSC_NOSHUFFLE 0 /* no shuffle */ +#define BLOSC_SHUFFLE 1 /* byte-wise shuffle */ +#define BLOSC_BITSHUFFLE 2 /* bit-wise shuffle */ + +/* Codes for internal flags (see blosc_cbuffer_metainfo) */ +#define BLOSC_DOSHUFFLE 0x1 /* byte-wise shuffle */ +#define BLOSC_MEMCPYED 0x2 /* plain copy */ +#define BLOSC_DOBITSHUFFLE 0x4 /* bit-wise shuffle */ + +/* Codes for the different compressors shipped with Blosc */ +#define BLOSC_BLOSCLZ 0 +#define BLOSC_LZ4 1 +#define BLOSC_LZ4HC 2 +#define BLOSC_SNAPPY 3 +#define BLOSC_ZLIB 4 +#define BLOSC_ZSTD 5 + +/* Names for the different compressors shipped with Blosc */ +#define BLOSC_BLOSCLZ_COMPNAME "blosclz" +#define BLOSC_LZ4_COMPNAME "lz4" +#define BLOSC_LZ4HC_COMPNAME "lz4hc" +#define BLOSC_SNAPPY_COMPNAME "snappy" +#define BLOSC_ZLIB_COMPNAME "zlib" +#define BLOSC_ZSTD_COMPNAME "zstd" + +/* Codes for compression libraries shipped with Blosc (code must be < 8) */ +#define BLOSC_BLOSCLZ_LIB 0 +#define BLOSC_LZ4_LIB 1 +#define BLOSC_SNAPPY_LIB 2 +#define BLOSC_ZLIB_LIB 3 +#define BLOSC_ZSTD_LIB 4 + +/* Names for the different compression libraries shipped with Blosc */ +#define BLOSC_BLOSCLZ_LIBNAME "BloscLZ" +#define BLOSC_LZ4_LIBNAME "LZ4" +#define BLOSC_SNAPPY_LIBNAME "Snappy" +#define BLOSC_ZLIB_LIBNAME "Zlib" +#define BLOSC_ZSTD_LIBNAME "Zstd" + +/* The codes for compressor formats shipped with Blosc */ +#define BLOSC_BLOSCLZ_FORMAT BLOSC_BLOSCLZ_LIB +#define BLOSC_LZ4_FORMAT BLOSC_LZ4_LIB +#define BLOSC_LZ4HC_FORMAT BLOSC_LZ4_LIB /* LZ4HC and LZ4 share the same format */ +#define BLOSC_SNAPPY_FORMAT BLOSC_SNAPPY_LIB +#define BLOSC_ZLIB_FORMAT BLOSC_ZLIB_LIB +#define BLOSC_ZSTD_FORMAT BLOSC_ZSTD_LIB + + +/* The version formats for compressors shipped with Blosc */ +/* All versions here starts at 1 */ +#define BLOSC_BLOSCLZ_VERSION_FORMAT 1 +#define BLOSC_LZ4_VERSION_FORMAT 1 +#define BLOSC_LZ4HC_VERSION_FORMAT 1 /* LZ4HC and LZ4 share the same format */ +#define BLOSC_SNAPPY_VERSION_FORMAT 1 +#define BLOSC_ZLIB_VERSION_FORMAT 1 +#define BLOSC_ZSTD_VERSION_FORMAT 1 + +/* Split mode for blocks. NEVER and ALWAYS are for experimenting with best compression ratio, + * AUTO for optimal behaviour (based on experiments), and FORWARD_COMPAT provides + * best forward compatibility */ +#define BLOSC_ALWAYS_SPLIT 1 +#define BLOSC_NEVER_SPLIT 2 +#define BLOSC_AUTO_SPLIT 3 +#define BLOSC_FORWARD_COMPAT_SPLIT 4 + +/** + Initialize the Blosc library environment. + + You must call this previous to any other Blosc call, unless you want + Blosc to be used simultaneously in a multi-threaded environment, in + which case you should *exclusively* use the + blosc_compress_ctx()/blosc_decompress_ctx() pair (see below). + */ +BLOSC_EXPORT void blosc_init(void); + + +/** + Destroy the Blosc library environment. + + You must call this after to you are done with all the Blosc calls, + unless you have not used blosc_init() before (see blosc_init() + above). + */ +BLOSC_EXPORT void blosc_destroy(void); + + +/** + Compress a block of data in the `src` buffer and returns the size of + the compressed block. The size of `src` buffer is specified by + `nbytes`. There is not a minimum for `src` buffer size (`nbytes`). + + `clevel` is the desired compression level and must be a number + between 0 (no compression) and 9 (maximum compression). + + `doshuffle` specifies whether the shuffle compression filters + should be applied or not. BLOSC_NOSHUFFLE means not applying it, + BLOSC_SHUFFLE means applying it at a byte level and BLOSC_BITSHUFFLE + at a bit level (slower but may achieve better entropy alignment). + + `typesize` is the number of bytes for the atomic type in binary + `src` buffer. This is mainly useful for the shuffle filters. + For implementation reasons, only a 1 < `typesize` < 256 will allow the + shuffle filter to work. When `typesize` is not in this range, shuffle + will be silently disabled. + + The `dest` buffer must have at least the size of `destsize`. Blosc + guarantees that if you set `destsize` to, at least, + (`nbytes` + BLOSC_MAX_OVERHEAD), the compression will always succeed. + The `src` buffer and the `dest` buffer can not overlap. + + Compression is memory safe and guaranteed not to write the `dest` + buffer beyond what is specified in `destsize`. + + If `src` buffer cannot be compressed into `destsize`, the return + value is zero and you should discard the contents of the `dest` + buffer. + + A negative return value means that an internal error happened. This + should never happen. If you see this, please report it back + together with the buffer data causing this and compression settings. + + Environment variables + --------------------- + + blosc_compress() honors different environment variables to control + internal parameters without the need of doing that programatically. + Here are the ones supported: + + BLOSC_CLEVEL=(INTEGER): This will overwrite the `clevel` parameter + before the compression process starts. + + BLOSC_SHUFFLE=[NOSHUFFLE | SHUFFLE | BITSHUFFLE]: This will + overwrite the `doshuffle` parameter before the compression process + starts. + + BLOSC_TYPESIZE=(INTEGER): This will overwrite the `typesize` + parameter before the compression process starts. + + BLOSC_COMPRESSOR=[BLOSCLZ | LZ4 | LZ4HC | SNAPPY | ZLIB]: This will + call blosc_set_compressor(BLOSC_COMPRESSOR) before the compression + process starts. + + BLOSC_NTHREADS=(INTEGER): This will call + blosc_set_nthreads(BLOSC_NTHREADS) before the compression process + starts. + + BLOSC_BLOCKSIZE=(INTEGER): This will call + blosc_set_blocksize(BLOSC_BLOCKSIZE) before the compression process + starts. *NOTE:* The blocksize is a critical parameter with + important restrictions in the allowed values, so use this with care. + + BLOSC_NOLOCK=(ANY VALUE): This will call blosc_compress_ctx() under + the hood, with the `compressor`, `blocksize` and + `numinternalthreads` parameters set to the same as the last calls to + blosc_set_compressor(), blosc_set_blocksize() and + blosc_set_nthreads(). BLOSC_CLEVEL, BLOSC_SHUFFLE, BLOSC_TYPESIZE + environment vars will also be honored. + + BLOSC_SPLITMODE=[ FORWARD_COMPAT | AUTO | ALWAYS | NEVER ]: + This will call blosc_set_splitmode() with the different supported values. + See blosc_set_splitmode() docstrings for more info on each mode. + + */ +BLOSC_EXPORT int blosc_compress(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void *src, void *dest, + size_t destsize); + + +/** + Context interface to blosc compression. This does not require a call + to blosc_init() and can be called from multithreaded applications + without the global lock being used, so allowing Blosc be executed + simultaneously in those scenarios. + + It uses the same parameters than the blosc_compress() function plus: + + `compressor`: the string representing the type of compressor to use. + + `blocksize`: the requested size of the compressed blocks. If 0, an + automatic blocksize will be used. + + `numinternalthreads`: the number of threads to use internally. + + A negative return value means that an internal error happened. This + should never happen. If you see this, please report it back + together with the buffer data causing this and compression settings. +*/ +BLOSC_EXPORT int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void* src, void* dest, + size_t destsize, const char* compressor, + size_t blocksize, int numinternalthreads); + +/** + Decompress a block of compressed data in `src`, put the result in + `dest` and returns the size of the decompressed block. + + The `src` buffer and the `dest` buffer can not overlap. + + Decompression is memory safe and guaranteed not to write the `dest` + buffer beyond what is specified in `destsize`. + + If an error occurs, e.g. the compressed data is corrupted or the + output buffer is not large enough, then 0 (zero) or a negative value + will be returned instead. + + Environment variables + --------------------- + + blosc_decompress() honors different environment variables to control + internal parameters without the need of doing that programatically. + Here are the ones supported: + + BLOSC_NTHREADS=(INTEGER): This will call + blosc_set_nthreads(BLOSC_NTHREADS) before the proper decompression + process starts. + + BLOSC_NOLOCK=(ANY VALUE): This will call blosc_decompress_ctx() + under the hood, with the `numinternalthreads` parameter set to the + same value as the last call to blosc_set_nthreads(). +*/ +BLOSC_EXPORT int blosc_decompress(const void *src, void *dest, size_t destsize); + + +/** + Context interface to blosc decompression. This does not require a + call to blosc_init() and can be called from multithreaded + applications without the global lock being used, so allowing Blosc + be executed simultaneously in those scenarios. + + It uses the same parameters than the blosc_decompress() function plus: + + `numinternalthreads`: number of threads to use internally. + + Decompression is memory safe and guaranteed not to write the `dest` + buffer more than what is specified in `destsize`. + + If an error occurs, e.g. the compressed data is corrupted or the + output buffer is not large enough, then 0 (zero) or a negative value + will be returned instead. +*/ +BLOSC_EXPORT int blosc_decompress_ctx(const void *src, void *dest, + size_t destsize, int numinternalthreads); + +/** + Get `nitems` (of typesize size) in `src` buffer starting in `start`. + The items are returned in `dest` buffer, which has to have enough + space for storing all items. + + Returns the number of bytes copied to `dest` or a negative value if + some error happens. + */ +BLOSC_EXPORT int blosc_getitem(const void *src, int start, int nitems, void *dest); + + +/** + Returns the current number of threads that are used for + compression/decompression. + */ +BLOSC_EXPORT int blosc_get_nthreads(void); + + +/** + Initialize a pool of threads for compression/decompression. If + `nthreads` is 1, then the serial version is chosen and a possible + previous existing pool is ended. If this is not called, `nthreads` + is set to 1 internally. + + Returns the previous number of threads. + */ +BLOSC_EXPORT int blosc_set_nthreads(int nthreads); + + +/** + Returns the current compressor that is being used for compression. + */ +BLOSC_EXPORT const char* blosc_get_compressor(void); + + +/** + Select the compressor to be used. The supported ones are "blosclz", + "lz4", "lz4hc", "snappy", "zlib" and "ztsd". If this function is not + called, then "blosclz" will be used by default. + + In case the compressor is not recognized, or there is not support + for it in this build, it returns a -1. Else it returns the code for + the compressor (>=0). + */ +BLOSC_EXPORT int blosc_set_compressor(const char* compname); + + +/** + Get the `compname` associated with the `compcode`. + + If the compressor code is not recognized, or there is not support + for it in this build, -1 is returned. Else, the compressor code is + returned. + */ +BLOSC_EXPORT int blosc_compcode_to_compname(int compcode, const char **compname); + + +/** + Return the compressor code associated with the compressor name. + + If the compressor name is not recognized, or there is not support + for it in this build, -1 is returned instead. + */ +BLOSC_EXPORT int blosc_compname_to_compcode(const char *compname); + + +/** + Get a list of compressors supported in the current build. The + returned value is a string with a concatenation of "blosclz", "lz4", + "lz4hc", "snappy", "zlib" or "zstd "separated by commas, depending + on which ones are present in the build. + + This function does not leak, so you should not free() the returned + list. + + This function should always succeed. + */ +BLOSC_EXPORT const char* blosc_list_compressors(void); + +/** + Return the version of the C-Blosc library in string format. + + Useful for dynamic libraries. +*/ +BLOSC_EXPORT const char* blosc_get_version_string(void); + + +/** + Get info from compression libraries included in the current build. + In `compname` you pass the compressor name that you want info from. + + In `complib` and `version` you get a pointer to the compressor + library name and the version in string format respectively. After + using the name and version, you should free() them so as to avoid + leaks. + + If the compressor is supported, it returns the code for the library + (>=0). If it is not supported, this function returns -1. + */ +BLOSC_EXPORT int blosc_get_complib_info(const char *compname, char **complib, char **version); + + +/** + Free possible memory temporaries and thread resources. Use this + when you are not going to use Blosc for a long while. In case of + problems releasing the resources, it returns a negative number, else + it returns 0. + */ +BLOSC_EXPORT int blosc_free_resources(void); + + +/** + Return information about a compressed buffer, namely the number of + uncompressed bytes (`nbytes`) and compressed (`cbytes`). It also + returns the `blocksize` (which is used internally for doing the + compression by blocks). + + You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a + compressed buffer for this call to work. + + If the format is not supported by the library, all output arguments will be + filled with zeros. + */ +BLOSC_EXPORT void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes, + size_t *cbytes, size_t *blocksize); + + +/** + Return meta-information about a compressed buffer, namely the type size + (`typesize`), as well as some internal `flags`. + + The `flags` is a set of bits, where the used ones are: + * bit 0: whether the shuffle filter has been applied or not + * bit 1: whether the internal buffer is a pure memcpy or not + * bit 2: whether the bit shuffle filter has been applied or not + + You can use the `BLOSC_DOSHUFFLE`, `BLOSC_DOBITSHUFFLE` and + `BLOSC_MEMCPYED` symbols for extracting the interesting bits + (e.g. ``flags & BLOSC_DOSHUFFLE`` says whether the buffer is + byte-shuffled or not). + + You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a + compressed buffer for this call to work. + + If the format is not supported by the library, all output arguments will be + filled with zeros. + */ +BLOSC_EXPORT void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize, + int *flags); + + +/** + Return information about a compressed buffer, namely the internal + Blosc format version (`version`) and the format for the internal + compressor used (`compversion`). + + This function should always succeed. + */ +BLOSC_EXPORT void blosc_cbuffer_versions(const void *cbuffer, int *version, + int *compversion); + + +/** + Return the compressor library/format used in a compressed buffer. + + This function should always succeed. + */ +BLOSC_EXPORT const char *blosc_cbuffer_complib(const void *cbuffer); + + + +/********************************************************************* + + Low-level functions follows. Use them only if you are an expert! + +*********************************************************************/ + +/** + Get the internal blocksize to be used during compression. 0 means + that an automatic blocksize is computed internally (the default). + */ +BLOSC_EXPORT int blosc_get_blocksize(void); + +/** + Force the use of a specific blocksize. If 0, an automatic + blocksize will be used (the default). + + The blocksize is a critical parameter with important restrictions in + the allowed values, so use this with care. + */ +BLOSC_EXPORT void blosc_set_blocksize(size_t blocksize); + +/** + Set the split mode. + + This function can take the next values: + * BLOSC_FORWARD_COMPAT_SPLIT + * BLOSC_AUTO_SPLIT + * BLOSC_NEVER_SPLIT + * BLOSC_ALWAYS_SPLIT + + BLOSC_FORWARD_COMPAT offers reasonably forward compatibility, + BLOSC_AUTO_SPLIT is for nearly optimal results (based on heuristics), + BLOSC_NEVER_SPLIT and BLOSC_ALWAYS_SPLIT are for the user experimenting + when trying to get best compression ratios and/or speed. + + If not called, the default mode is BLOSC_FORWARD_COMPAT_SPLIT. + + This function should always succeed. + */ +BLOSC_EXPORT void blosc_set_splitmode(int splitmode); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/c-blosc/blosc/blosclz.c b/c-blosc/blosc/blosclz.c new file mode 100644 index 0000000..564d8a3 --- /dev/null +++ b/c-blosc/blosc/blosclz.c @@ -0,0 +1,528 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2009-05-20 + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + The code in this file is heavily based on FastLZ, a lightning-fast + lossless compression library. See LICENSES/FASTLZ.txt for details. +**********************************************************************/ + + +#include +#include +#include "blosclz.h" +#include "fastcopy.h" +#include "blosc-common.h" + + +/* + * Check for bound when decompressing. + * It is a good idea to define this while developing. + */ +#undef BLOSCLZ_SAFE + +/* + * Give hints to the compiler for branch prediction optimization. + */ +#if defined(__GNUC__) && (__GNUC__ > 2) +#define BLOSCLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) +#define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) +#else +#define BLOSCLZ_EXPECT_CONDITIONAL(c) (c) +#define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (c) +#endif + +/* + * Use inlined functions for supported systems. + */ +#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ +#define inline __inline /* Visual C is not C99, but supports some kind of inline */ +#endif + +#define MAX_COPY 32 +#define MAX_DISTANCE 8191 +#define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1) + +#ifdef BLOSC_STRICT_ALIGN + #define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8) +#else + #define BLOSCLZ_READU16(p) *((const uint16_t*)(p)) +#endif + + +/* Simple, but pretty effective hash function for 3-byte sequence */ +#define HASH_FUNCTION(v, p, l) { \ + v = BLOSCLZ_READU16(p); \ + v ^= BLOSCLZ_READU16(p + 1) ^ ( v >> (16 - l)); \ + v &= (1 << l) - 1; \ +} + +#define LITERAL(ip, op, op_limit, anchor, copy) { \ + if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) \ + goto out; \ + *op++ = *anchor++; \ + ip = anchor; \ + copy++; \ + if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { \ + copy = 0; \ + *op++ = MAX_COPY-1; \ + } \ + continue; \ +} + +#define IP_BOUNDARY 2 + + + +static inline uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + uint8_t x = ip[-1]; + int64_t value, value2; + /* Broadcast the value for every byte in a 64-bit register */ + memset(&value, x, 8); + /* safe because the outer check against ip limit */ + while (ip < (ip_bound - sizeof(int64_t))) { +#if defined(BLOSC_STRICT_ALIGN) + memcpy(&value2, ref, 8); +#else + value2 = ((int64_t*)ref)[0]; +#endif + if (value != value2) { + /* Find the byte that starts to differ */ + while (*ref++ == x) ip++; + return ip; + } + else { + ip += 8; + ref += 8; + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == x)) ip++; + return ip; +} + +#ifdef __SSE2__ +static inline uint8_t *get_run_16(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + uint8_t x = ip[-1]; + __m128i value, value2, cmp; + + /* Broadcast the value for every byte in a 128-bit register */ + memset(&value, x, sizeof(__m128i)); + /* safe because the outer check against ip limit */ + while (ip < (ip_bound - sizeof(__m128i))) { + value2 = _mm_loadu_si128((__m128i *)ref); + cmp = _mm_cmpeq_epi32(value, value2); + if (_mm_movemask_epi8(cmp) != 0xFFFF) { + /* Find the byte that starts to differ */ + while (*ref++ == x) ip++; + return ip; + } + else { + ip += sizeof(__m128i); + ref += sizeof(__m128i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == x)) ip++; + return ip; +} +#endif + + +#ifdef __AVX2__ +static inline uint8_t *get_run_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + uint8_t x = ip[-1]; + __m256i value, value2, cmp; + + /* Broadcast the value for every byte in a 256-bit register */ + memset(&value, x, sizeof(__m256i)); + /* safe because the outer check against ip limit */ + while (ip < (ip_bound - (sizeof(__m256i)))) { + value2 = _mm256_loadu_si256((__m256i *)ref); + cmp = _mm256_cmpeq_epi64(value, value2); + if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { + /* Find the byte that starts to differ */ + while (*ref++ == x) ip++; + return ip; + } + else { + ip += sizeof(__m256i); + ref += sizeof(__m256i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == x)) ip++; + return ip; +} +#endif + + +/* Find the byte that starts to differ */ +uint8_t *get_match(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { +#if !defined(BLOSC_STRICT_ALIGN) + while (ip < (ip_bound - sizeof(int64_t))) { + if (((int64_t*)ref)[0] != ((int64_t*)ip)[0]) { + /* Find the byte that starts to differ */ + while (*ref++ == *ip++) {} + return ip; + } + else { + ip += sizeof(int64_t); + ref += sizeof(int64_t); + } + } +#endif + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == *ip++)) {} + return ip; +} + + +#if defined(__SSE2__) +uint8_t *get_match_16(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + __m128i value, value2, cmp; + + while (ip < (ip_bound - sizeof(__m128i))) { + value = _mm_loadu_si128((__m128i *) ip); + value2 = _mm_loadu_si128((__m128i *) ref); + cmp = _mm_cmpeq_epi32(value, value2); + if (_mm_movemask_epi8(cmp) != 0xFFFF) { + /* Find the byte that starts to differ */ + while (*ref++ == *ip++) {} + return ip; + } + else { + ip += sizeof(__m128i); + ref += sizeof(__m128i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == *ip++)) {} + return ip; +} +#endif + + +#if defined(__AVX2__) +uint8_t *get_match_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + __m256i value, value2, cmp; + + while (ip < (ip_bound - sizeof(__m256i))) { + value = _mm256_loadu_si256((__m256i *) ip); + value2 = _mm256_loadu_si256((__m256i *)ref); + cmp = _mm256_cmpeq_epi64(value, value2); + if (_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { + /* Find the byte that starts to differ */ + while (*ref++ == *ip++) {} + return ip; + } + else { + ip += sizeof(__m256i); + ref += sizeof(__m256i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == *ip++)) {} + return ip; +} +#endif + + +int blosclz_compress(const int opt_level, const void* input, int length, + void* output, int maxout) { + uint8_t* ip = (uint8_t*)input; + uint8_t* ibase = (uint8_t*)input; + uint8_t* ip_bound = ip + length - IP_BOUNDARY; + uint8_t* ip_limit = ip + length - 12; + uint8_t* op = (uint8_t*)output; + + /* Hash table depends on the opt level. Hash_log cannot be larger than 15. */ + /* The parametrization below is made from playing with the bench suite, like: + $ bench/bench blosclz single 4 + $ bench/bench blosclz single 4 4194280 12 25 + and taking the minimum times on a i5-3380M @ 2.90GHz. + Curiously enough, values >= 14 does not always + get maximum compression, even with large blocksizes. */ + int8_t hash_log_[10] = {-1, 15, 15, 15, 15, 15, 15, 15, 15, 15}; + uint8_t hash_log = hash_log_[opt_level]; + uint16_t hash_size = 1 << hash_log; + uint16_t* htab; + uint8_t* op_limit; + + int32_t hval; + uint8_t copy; + + double maxlength_[10] = {-1, .1, .3, .5, .6, .8, .9, .95, 1.0, 1.0}; + int32_t maxlength = (int32_t)(length * maxlength_[opt_level]); + if (maxlength > (int32_t)maxout) { + maxlength = (int32_t)maxout; + } + op_limit = op + maxlength; + + /* output buffer cannot be less than 66 bytes or we can get into trouble */ + if (BLOSCLZ_UNEXPECT_CONDITIONAL(maxout < 66 || length < 4)) { + return 0; + } + + htab = (uint16_t*)calloc(hash_size, sizeof(uint16_t)); + + /* we start with literal copy */ + copy = 2; + *op++ = MAX_COPY - 1; + *op++ = *ip++; + *op++ = *ip++; + + /* main loop */ + while (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) { + const uint8_t* ref; + int32_t distance; + int32_t len = 3; /* minimum match length */ + uint8_t* anchor = ip; /* comparison starting-point */ + + /* check for a run */ + if (ip[0] == ip[-1] && BLOSCLZ_READU16(ip - 1) == BLOSCLZ_READU16(ip + 1)) { + distance = 1; + ip += 3; + ref = anchor - 1 + 3; + goto match; + } + + /* find potential match */ + HASH_FUNCTION(hval, ip, hash_log); + ref = ibase + htab[hval]; + + /* calculate distance to the match */ + distance = (int32_t)(anchor - ref); + + /* update hash table if necessary */ + /* not exactly sure why 0x1F works best, but experiments apparently say so */ + if ((distance & 0x1F) == 0) + htab[hval] = (uint16_t)(anchor - ibase); + + /* is this a match? check the first 3 bytes */ + if (distance == 0 || (distance >= MAX_FARDISTANCE) || + *ref++ != *ip++ || *ref++ != *ip++ || *ref++ != *ip++) { + LITERAL(ip, op, op_limit, anchor, copy); + } + + /* far, needs at least 5-byte match */ + if (opt_level >= 5 && distance >= MAX_DISTANCE) { + if (*ip++ != *ref++ || *ip++ != *ref++) LITERAL(ip, op, op_limit, anchor, copy); + len += 2; + } + + match: + + /* last matched byte */ + ip = anchor + len; + + /* distance is biased */ + distance--; + + if (!distance) { + /* zero distance means a run */ +#if defined(__AVX2__) + ip = get_run_32(ip, ip_bound, ref); +#elif defined(__SSE2__) + ip = get_run_16(ip, ip_bound, ref); +#else + ip = get_run(ip, ip_bound, ref); +#endif + } + else { +#if defined(__AVX2__) + /* Experiments show that the SSE2 version is a bit faster, even on AVX2 processors */ + ip = get_match_16(ip, ip_bound + IP_BOUNDARY, ref); +#elif defined(__SSE2__) + ip = get_match_16(ip, ip_bound + IP_BOUNDARY, ref); +#else + ip = get_match(ip, ip_bound + IP_BOUNDARY, ref); +#endif + } + + /* if we have copied something, adjust the copy count */ + if (copy) + /* copy is biased, '0' means 1 byte copy */ + *(op - copy - 1) = copy - 1; + else + /* back, to overwrite the copy count */ + op--; + + /* reset literal counter */ + copy = 0; + + /* length is biased, '1' means a match of 3 bytes */ + ip -= 3; + len = (int32_t)(ip - anchor); + + /* check that we have space enough to encode the match for all the cases */ + if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + (len / 255) + 6 > op_limit)) goto out; + + /* encode the match */ + if (distance < MAX_DISTANCE) { + if (len < 7) { + *op++ = (len << 5) + (distance >> 8); + *op++ = (distance & 255); + } + else { + *op++ = (uint8_t)((7 << 5) + (distance >> 8)); + for (len -= 7; len >= 255; len -= 255) + *op++ = 255; + *op++ = len; + *op++ = (distance & 255); + } + } + else { + /* far away, but not yet in the another galaxy... */ + if (len < 7) { + distance -= MAX_DISTANCE; + *op++ = (uint8_t)((len << 5) + 31); + *op++ = 255; + *op++ = (uint8_t)(distance >> 8); + *op++ = distance & 255; + } + else { + distance -= MAX_DISTANCE; + *op++ = (7 << 5) + 31; + for (len -= 7; len >= 255; len -= 255) + *op++ = 255; + *op++ = len; + *op++ = 255; + *op++ = (uint8_t)(distance >> 8); + *op++ = distance & 255; + } + } + + /* update the hash at match boundary */ + HASH_FUNCTION(hval, ip, hash_log); + htab[hval] = (uint16_t)(ip++ - ibase); + HASH_FUNCTION(hval, ip, hash_log); + htab[hval] = (uint16_t)(ip++ - ibase); + + /* assuming literal copy */ + *op++ = MAX_COPY - 1; + } + + /* left-over as literal copy */ + ip_bound++; + while (ip <= ip_bound) { + if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + 2 > op_limit)) goto out; + *op++ = *ip++; + copy++; + if (copy == MAX_COPY) { + copy = 0; + *op++ = MAX_COPY - 1; + } + } + + /* if we have copied something, adjust the copy length */ + if (copy) + *(op - copy - 1) = copy - 1; + else + op--; + + /* marker for blosclz */ + *(uint8_t*)output |= (1 << 5); + + free(htab); + return (int)(op - (uint8_t*)output); + + out: + free(htab); + return 0; + +} + +int blosclz_decompress(const void* input, int length, void* output, int maxout) { + const uint8_t* ip = (const uint8_t*)input; + const uint8_t* ip_limit = ip + length; + uint8_t* op = (uint8_t*)output; + int32_t ctrl = (*ip++) & 31; + int32_t loop = 1; +#ifdef BLOSCLZ_SAFE + uint8_t* op_limit = op + maxout; +#endif + + do { + uint8_t* ref = op; + int32_t len = ctrl >> 5; + int32_t ofs = (ctrl & 31) << 8; + + if (ctrl >= 32) { + uint8_t code; + len--; + ref -= ofs; + if (len == 7 - 1) + do { + code = *ip++; + len += code; + } while (code == 255); + code = *ip++; + ref -= code; + + /* match from 16-bit distance */ + if (BLOSCLZ_UNEXPECT_CONDITIONAL(code == 255)) if (BLOSCLZ_EXPECT_CONDITIONAL(ofs == (31 << 8))) { + ofs = (*ip++) << 8; + ofs += *ip++; + ref = op - ofs - MAX_DISTANCE; + } + +#ifdef BLOSCLZ_SAFE + if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) { + return 0; + } + + if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref - 1 < (uint8_t*)output)) { + return 0; + } +#endif + + if (BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) + ctrl = *ip++; + else + loop = 0; + + if (ref == op) { + /* optimized copy for a run */ + uint8_t b = ref[-1]; + memset(op, b, len + 3); + op += len + 3; + } + else { + /* copy from reference */ + ref--; + len += 3; + op = safecopy(op, ref, (unsigned) len); + } + } + else { + ctrl++; +#ifdef BLOSCLZ_SAFE + if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) { + return 0; + } + if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) { + return 0; + } +#endif + + // memcpy(op, ip, ctrl); op += ctrl; ip += ctrl; + // On GCC-6, fastcopy this is still faster than plain memcpy + // However, using recent CLANG/LLVM 9.0, there is almost no difference + // in performance. + op = fastcopy(op, ip, (unsigned) ctrl); + ip += ctrl; + + loop = (int32_t)BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit); + if (loop) + ctrl = *ip++; + } + } while (BLOSCLZ_EXPECT_CONDITIONAL(loop)); + + return (int)(op - (uint8_t*)output); +} diff --git a/c-blosc/blosc/blosclz.h b/c-blosc/blosc/blosclz.h new file mode 100644 index 0000000..63494dc --- /dev/null +++ b/c-blosc/blosc/blosclz.h @@ -0,0 +1,64 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + The code in this file is heavily based on FastLZ, a lightning-fast + lossless compression library. See LICENSES/FASTLZ.txt for details + about copyright and rights to use. +**********************************************************************/ + + +#ifndef BLOSCLZ_H +#define BLOSCLZ_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/** + Compress a block of data in the input buffer and returns the size of + compressed block. The size of input buffer is specified by + length. The minimum input buffer size is 16. + + The output buffer must be at least 5% larger than the input buffer + and can not be smaller than 66 bytes. + + If the input is not compressible, or output does not fit in maxout + bytes, the return value will be 0 and you will have to discard the + output buffer. + + The acceleration parameter is related with the frequency for + updating the internal hash. An acceleration of 1 means that the + internal hash is updated at full rate. A value < 1 is not allowed + and will be silently set to 1. + + The input buffer and the output buffer can not overlap. +*/ + +int blosclz_compress(const int opt_level, const void* input, int length, + void* output, int maxout); + +/** + Decompress a block of compressed data and returns the size of the + decompressed block. If error occurs, e.g. the compressed data is + corrupted or the output buffer is not large enough, then 0 (zero) + will be returned instead. + + The input buffer and the output buffer can not overlap. + + Decompression is memory safe and guaranteed not to write the output buffer + more than what is specified in maxout. + */ + +int blosclz_decompress(const void* input, int length, void* output, int maxout); + +#if defined (__cplusplus) +} +#endif + +#endif /* BLOSCLZ_H */ diff --git a/c-blosc/blosc/config.h.in b/c-blosc/blosc/config.h.in new file mode 100644 index 0000000..8b518c2 --- /dev/null +++ b/c-blosc/blosc/config.h.in @@ -0,0 +1,11 @@ +#ifndef _CONFIGURATION_HEADER_GUARD_H_ +#define _CONFIGURATION_HEADER_GUARD_H_ + +#cmakedefine HAVE_LZ4 @HAVE_LZ4@ +#cmakedefine HAVE_SNAPPY @HAVE_SNAPPY@ +#cmakedefine HAVE_ZLIB @HAVE_ZLIB@ +#cmakedefine HAVE_ZSTD @HAVE_ZSTD@ +#cmakedefine BLOSC_DLL_EXPORT @DLL_EXPORT@ + + +#endif diff --git a/c-blosc/blosc/fastcopy.c b/c-blosc/blosc/fastcopy.c new file mode 100644 index 0000000..dbfc5ec --- /dev/null +++ b/c-blosc/blosc/fastcopy.c @@ -0,0 +1,504 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2018-01-03 + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + The code in this file is heavily based on memcopy.h, from the + zlib-ng compression library. See LICENSES/ZLIB.txt for details. + See also: https://github.com/Dead2/zlib-ng/blob/develop/zlib.h + + New implementations by Francesc Alted: + * get_run() and get_match() familiy of functions + * fast_copy() and safe_copy() functions + * Support for SSE2/AVX2 copy instructions for these routines +**********************************************************************/ + +#include +#include "blosc-common.h" + + +static inline unsigned char *copy_1_bytes(unsigned char *out, const unsigned char *from) { + *out++ = *from; + return out; +} + +static inline unsigned char *copy_2_bytes(unsigned char *out, const unsigned char *from) { +#if defined(BLOSC_STRICT_ALIGN) + uint16_t chunk; + memcpy(&chunk, from, 2); + memcpy(out, &chunk, 2); +#else + *(uint16_t *) out = *(uint16_t *) from; +#endif + return out + 2; +} + +static inline unsigned char *copy_3_bytes(unsigned char *out, const unsigned char *from) { + out = copy_1_bytes(out, from); + return copy_2_bytes(out, from + 1); +} + +static inline unsigned char *copy_4_bytes(unsigned char *out, const unsigned char *from) { +#if defined(BLOSC_STRICT_ALIGN) + uint32_t chunk; + memcpy(&chunk, from, 4); + memcpy(out, &chunk, 4); +#else + *(uint32_t *) out = *(uint32_t *) from; +#endif + return out + 4; +} + +static inline unsigned char *copy_5_bytes(unsigned char *out, const unsigned char *from) { + out = copy_1_bytes(out, from); + return copy_4_bytes(out, from + 1); +} + +static inline unsigned char *copy_6_bytes(unsigned char *out, const unsigned char *from) { + out = copy_2_bytes(out, from); + return copy_4_bytes(out, from + 2); +} + +static inline unsigned char *copy_7_bytes(unsigned char *out, const unsigned char *from) { + out = copy_3_bytes(out, from); + return copy_4_bytes(out, from + 3); +} + +static inline unsigned char *copy_8_bytes(unsigned char *out, const unsigned char *from) { +#if defined(BLOSC_STRICT_ALIGN) + uint64_t chunk; + memcpy(&chunk, from, 8); + memcpy(out, &chunk, 8); +#else + *(uint64_t *) out = *(uint64_t *) from; +#endif + return out + 8; +} + + +static inline unsigned char *copy_16_bytes(unsigned char *out, const unsigned char *from) { +#if defined(__SSE2__) + __m128i chunk; + chunk = _mm_loadu_si128((__m128i*)from); + _mm_storeu_si128((__m128i*)out, chunk); + from += 16; out += 16; +#elif !defined(BLOSC_STRICT_ALIGN) + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; +#else + int i; + for (i = 0; i < 16; i++) { + *out++ = *from++; + } +#endif + return out; +} + +static inline unsigned char *copy_32_bytes(unsigned char *out, const unsigned char *from) { +#if defined(__AVX2__) + __m256i chunk; + chunk = _mm256_loadu_si256((__m256i*)from); + _mm256_storeu_si256((__m256i*)out, chunk); + from += 32; out += 32; +#elif defined(__SSE2__) + __m128i chunk; + chunk = _mm_loadu_si128((__m128i*)from); + _mm_storeu_si128((__m128i*)out, chunk); + from += 16; out += 16; + chunk = _mm_loadu_si128((__m128i*)from); + _mm_storeu_si128((__m128i*)out, chunk); + from += 16; out += 16; +#elif !defined(BLOSC_STRICT_ALIGN) + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; +#else + int i; + for (i = 0; i < 32; i++) { + *out++ = *from++; + } +#endif + return out; +} + +#if defined(__AVX2__) +static inline unsigned char *copy_32_bytes_aligned(unsigned char *out, const unsigned char *from) { + __m256i chunk; + chunk = _mm256_load_si256((__m256i*)from); + _mm256_storeu_si256((__m256i*)out, chunk); + return out + 32; +} +#endif // __AVX2__ + +/* Copy LEN bytes (7 or fewer) from FROM into OUT. Return OUT + LEN. */ +static inline unsigned char *copy_bytes(unsigned char *out, const unsigned char *from, unsigned len) { + assert(len < 8); + +#ifdef BLOSC_STRICT_ALIGN + while (len--) { + *out++ = *from++; + } +#else + switch (len) { + case 7: + return copy_7_bytes(out, from); + case 6: + return copy_6_bytes(out, from); + case 5: + return copy_5_bytes(out, from); + case 4: + return copy_4_bytes(out, from); + case 3: + return copy_3_bytes(out, from); + case 2: + return copy_2_bytes(out, from); + case 1: + return copy_1_bytes(out, from); + case 0: + return out; + default: + assert(0); + } +#endif /* BLOSC_STRICT_ALIGN */ + return out; +} + +/* Byte by byte semantics: copy LEN bytes from FROM and write them to OUT. Return OUT + LEN. */ +static inline unsigned char *chunk_memcpy(unsigned char *out, const unsigned char *from, unsigned len) { + unsigned sz = sizeof(uint64_t); + unsigned rem = len % sz; + unsigned by8; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ + copy_8_bytes(out, from); + + len /= sz; + out += rem; + from += rem; + + by8 = len % 8; + len -= by8; + switch (by8) { + case 7: + out = copy_8_bytes(out, from); + from += sz; + case 6: + out = copy_8_bytes(out, from); + from += sz; + case 5: + out = copy_8_bytes(out, from); + from += sz; + case 4: + out = copy_8_bytes(out, from); + from += sz; + case 3: + out = copy_8_bytes(out, from); + from += sz; + case 2: + out = copy_8_bytes(out, from); + from += sz; + case 1: + out = copy_8_bytes(out, from); + from += sz; + default: + break; + } + + while (len) { + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + + len -= 8; + } + + return out; +} + +/* 16-byte version of chunk_memcpy() */ +static inline unsigned char *chunk_memcpy_16(unsigned char *out, const unsigned char *from, unsigned len) { + unsigned sz = 16; + unsigned rem = len % sz; + unsigned ilen; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ + copy_16_bytes(out, from); + + len /= sz; + out += rem; + from += rem; + + for (ilen = 0; ilen < len; ilen++) { + copy_16_bytes(out, from); + out += sz; + from += sz; + } + + return out; +} + +/* 32-byte version of chunk_memcpy() */ +static inline unsigned char *chunk_memcpy_32(unsigned char *out, const unsigned char *from, unsigned len) { + unsigned sz = 32; + unsigned rem = len % sz; + unsigned ilen; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ + copy_32_bytes(out, from); + + len /= sz; + out += rem; + from += rem; + + for (ilen = 0; ilen < len; ilen++) { + copy_32_bytes(out, from); + out += sz; + from += sz; + } + + return out; +} + +/* 32-byte *unrolled* version of chunk_memcpy() */ +static inline unsigned char *chunk_memcpy_32_unrolled(unsigned char *out, const unsigned char *from, unsigned len) { + unsigned sz = 32; + unsigned rem = len % sz; + unsigned by8; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ + copy_32_bytes(out, from); + + len /= sz; + out += rem; + from += rem; + + by8 = len % 8; + len -= by8; + switch (by8) { + case 7: + out = copy_32_bytes(out, from); + from += sz; + case 6: + out = copy_32_bytes(out, from); + from += sz; + case 5: + out = copy_32_bytes(out, from); + from += sz; + case 4: + out = copy_32_bytes(out, from); + from += sz; + case 3: + out = copy_32_bytes(out, from); + from += sz; + case 2: + out = copy_32_bytes(out, from); + from += sz; + case 1: + out = copy_32_bytes(out, from); + from += sz; + default: + break; + } + + while (len) { + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + out = copy_32_bytes(out, from); + from += sz; + + len -= 8; + } + + return out; +} + + +/* SSE2/AVX2 *unaligned* version of chunk_memcpy() */ +#if defined(__SSE2__) || defined(__AVX2__) +static inline unsigned char *chunk_memcpy_unaligned(unsigned char *out, const unsigned char *from, unsigned len) { +#if defined(__AVX2__) + unsigned sz = sizeof(__m256i); +#elif defined(__SSE2__) + unsigned sz = sizeof(__m128i); +#endif + unsigned rem = len % sz; + unsigned ilen; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ +#if defined(__AVX2__) + copy_32_bytes(out, from); +#elif defined(__SSE2__) + copy_16_bytes(out, from); +#endif + + len /= sz; + out += rem; + from += rem; + + for (ilen = 0; ilen < len; ilen++) { +#if defined(__AVX2__) + copy_32_bytes(out, from); +#elif defined(__SSE2__) + copy_16_bytes(out, from); +#endif + out += sz; + from += sz; + } + + return out; +} +#endif // __SSE2__ || __AVX2__ + + +#if defined(__SSE2__) || defined(__AVX2__) +/* SSE2/AVX2 *aligned* version of chunk_memcpy() */ +static inline unsigned char *chunk_memcpy_aligned(unsigned char *out, const unsigned char *from, unsigned len) { +#if defined(__AVX2__) + unsigned sz = sizeof(__m256i); + __m256i chunk; +#elif defined(__SSE2__) + unsigned sz = sizeof(__m128i); + __m128i chunk; +#endif + unsigned bytes_to_align = sz - (unsigned)(((uintptr_t)(const void *)(from)) % sz); + unsigned corrected_len = len - bytes_to_align; + unsigned rem = corrected_len % sz; + unsigned ilen; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has aligned access. */ +#if defined(__AVX2__) + chunk = _mm256_loadu_si256((__m256i *) from); + _mm256_storeu_si256((__m256i *) out, chunk); +#elif defined(__SSE2__) + chunk = _mm_loadu_si128((__m128i *) from); + _mm_storeu_si128((__m128i *) out, chunk); +#endif + out += bytes_to_align; + from += bytes_to_align; + + len = corrected_len / sz; + for (ilen = 0; ilen < len; ilen++) { +#if defined(__AVX2__) + chunk = _mm256_load_si256((__m256i *) from); /* *aligned* load */ + _mm256_storeu_si256((__m256i *) out, chunk); +#elif defined(__SSE2__) + chunk = _mm_load_si128((__m128i *) from); /* *aligned* load */ + _mm_storeu_si128((__m128i *) out, chunk); +#endif + out += sz; + from += sz; + } + + /* Copy remaining bytes */ + if (rem < 8) { + out = copy_bytes(out, from, rem); + } + else { + out = chunk_memcpy(out, from, rem); + } + + return out; +} +#endif // __AVX2__ || __SSE2__ + + +/* Byte by byte semantics: copy LEN bytes from FROM and write them to OUT. Return OUT + LEN. */ +unsigned char *fastcopy(unsigned char *out, const unsigned char *from, unsigned len) { + switch (len) { + case 32: + return copy_32_bytes(out, from); + case 16: + return copy_16_bytes(out, from); + case 8: + return copy_8_bytes(out, from); + default: { + } + } + if (len < 8) { + return copy_bytes(out, from, len); + } +#if defined(__SSE2__) + if (len < 16) { + return chunk_memcpy(out, from, len); + } +#if !defined(__AVX2__) + return chunk_memcpy_unaligned(out, from, len); +#else + if (len < 32) { + return chunk_memcpy_16(out, from, len); + } + return chunk_memcpy_unaligned(out, from, len); +#endif // !__AVX2__ +#endif // __SSE2__ + return chunk_memcpy(out, from, len); +} + + +/* Same as fastcopy() but without overwriting origin or destination when they overlap */ +unsigned char* safecopy(unsigned char *out, const unsigned char *from, unsigned len) { +#if defined(__AVX2__) + unsigned sz = sizeof(__m256i); +#elif defined(__SSE2__) + unsigned sz = sizeof(__m128i); +#else + unsigned sz = sizeof(uint64_t); +#endif + if (out - sz < from) { + for (; len; --len) { + *out++ = *from++; + } + return out; + } + else { + return fastcopy(out, from, len); + } +} diff --git a/c-blosc/blosc/fastcopy.h b/c-blosc/blosc/fastcopy.h new file mode 100644 index 0000000..4b0ca39 --- /dev/null +++ b/c-blosc/blosc/fastcopy.h @@ -0,0 +1,19 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2018-01-03 + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_FASTCOPY_H +#define BLOSC_FASTCOPY_H + +/* Same semantics than memcpy() */ +unsigned char *fastcopy(unsigned char *out, const unsigned char *from, unsigned len); + +/* Same as fastcopy() but without overwriting origin or destination when they overlap */ +unsigned char* safecopy(unsigned char *out, const unsigned char *from, unsigned len); + +#endif //BLOSC_FASTCOPY_H diff --git a/c-blosc/blosc/shuffle-avx2.c b/c-blosc/blosc/shuffle-avx2.c new file mode 100644 index 0000000..86e750b --- /dev/null +++ b/c-blosc/blosc/shuffle-avx2.c @@ -0,0 +1,757 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle-generic.h" +#include "shuffle-avx2.h" + +/* Make sure AVX2 is available for the compilation target and compiler. */ +#if !defined(__AVX2__) + #error AVX2 is not supported by the target architecture/platform and/or this compiler. +#endif + +#include + + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + +static void printymm(__m256i ymm0) +{ + uint8_t buf[32]; + + ((__m256i *)buf)[0] = ymm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15], + buf[16], buf[17], buf[18], buf[19], + buf[20], buf[21], buf[22], buf[23], + buf[24], buf[25], buf[26], buf[27], + buf[28], buf[29], buf[30], buf[31]); +} +#endif + +/* GCC doesn't include the split load/store intrinsics + needed for the tiled shuffle, so define them here. */ +#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) +static inline __m256i +__attribute__((__always_inline__)) +_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr) +{ + return _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1); +} + +static inline void +__attribute__((__always_inline__)) +_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a) +{ + _mm_storeu_si128(loaddr, _mm256_castsi256_si128(a)); + _mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1)); +} +#endif /* defined(__GNUC__) */ + +/* Routine optimized for shuffling a buffer for a type size of 2 bytes. */ +static void +shuffle2_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t j; + int k; + __m256i ymm0[2], ymm1[2]; + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i shmask = _mm256_set_epi8( + 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, + 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00, + 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, + 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00); + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Fetch 32 elements (64 bytes) then transpose bytes, words and double words. */ + for (k = 0; k < 2; k++) { + ymm0[k] = _mm256_loadu_si256((__m256i*)(src + (j * bytesoftype) + (k * sizeof(__m256i)))); + ymm1[k] = _mm256_shuffle_epi8(ymm0[k], shmask); + } + + ymm0[0] = _mm256_permute4x64_epi64(ymm1[0], 0xd8); + ymm0[1] = _mm256_permute4x64_epi64(ymm1[1], 0x8d); + + ymm1[0] = _mm256_blend_epi32(ymm0[0], ymm0[1], 0xf0); + ymm0[1] = _mm256_blend_epi32(ymm0[0], ymm0[1], 0x0f); + ymm1[1] = _mm256_permute4x64_epi64(ymm0[1], 0x4e); + + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 2; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (k * total_elements)), ymm1[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 4 bytes. */ +static void +shuffle4_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + __m256i ymm0[4], ymm1[4]; + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i mask = _mm256_set_epi32( + 0x07, 0x03, 0x06, 0x02, 0x05, 0x01, 0x04, 0x00); + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Fetch 32 elements (128 bytes) then transpose bytes and words. */ + for (j = 0; j < 4; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src + (i * bytesoftype) + (j * sizeof(__m256i)))); + ymm1[j] = _mm256_shuffle_epi32(ymm0[j], 0xd8); + ymm0[j] = _mm256_shuffle_epi32(ymm0[j], 0x8d); + ymm0[j] = _mm256_unpacklo_epi8(ymm1[j], ymm0[j]); + ymm1[j] = _mm256_shuffle_epi32(ymm0[j], 0x04e); + ymm0[j] = _mm256_unpacklo_epi16(ymm0[j], ymm1[j]); + } + /* Transpose double words */ + for (j = 0; j < 2; j++) { + ymm1[j*2] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + ymm1[j*2+1] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + /* Transpose quad words */ + for (j = 0; j < 2; j++) { + ymm0[j*2] = _mm256_unpacklo_epi64(ymm1[j], ymm1[j+2]); + ymm0[j*2+1] = _mm256_unpackhi_epi64(ymm1[j], ymm1[j+2]); + } + for (j = 0; j < 4; j++) { + ymm0[j] = _mm256_permutevar8x32_epi32(ymm0[j], mask); + } + /* Store the result vectors */ + uint8_t* const dest_for_ith_element = dest + i; + for (j = 0; j < 4; j++) { + _mm256_storeu_si256((__m256i*)(dest_for_ith_element + (j * total_elements)), ymm0[j]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 8 bytes. */ +static void +shuffle8_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t j; + int k, l; + __m256i ymm0[8], ymm1[8]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Fetch 32 elements (256 bytes) then transpose bytes. */ + for (k = 0; k < 8; k++) { + ymm0[k] = _mm256_loadu_si256((__m256i*)(src + (j * bytesoftype) + (k * sizeof(__m256i)))); + ymm1[k] = _mm256_shuffle_epi32(ymm0[k], 0x4e); + ymm1[k] = _mm256_unpacklo_epi8(ymm0[k], ymm1[k]); + } + /* Transpose words */ + for (k = 0, l = 0; k < 4; k++, l +=2) { + ymm0[k*2] = _mm256_unpacklo_epi16(ymm1[l], ymm1[l+1]); + ymm0[k*2+1] = _mm256_unpackhi_epi16(ymm1[l], ymm1[l+1]); + } + /* Transpose double words */ + for (k = 0, l = 0; k < 4; k++, l++) { + if (k == 2) l += 2; + ymm1[k*2] = _mm256_unpacklo_epi32(ymm0[l], ymm0[l+2]); + ymm1[k*2+1] = _mm256_unpackhi_epi32(ymm0[l], ymm0[l+2]); + } + /* Transpose quad words */ + for (k = 0; k < 4; k++) { + ymm0[k*2] = _mm256_unpacklo_epi64(ymm1[k], ymm1[k+4]); + ymm0[k*2+1] = _mm256_unpackhi_epi64(ymm1[k], ymm1[k+4]); + } + for(k = 0; k < 8; k++) { + ymm1[k] = _mm256_permute4x64_epi64(ymm0[k], 0x72); + ymm0[k] = _mm256_permute4x64_epi64(ymm0[k], 0xD8); + ymm0[k] = _mm256_unpacklo_epi16(ymm0[k], ymm1[k]); + } + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 8; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (k * total_elements)), ymm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 16 bytes. */ +static void +shuffle16_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t j; + int k, l; + __m256i ymm0[16], ymm1[16]; + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i shmask = _mm256_set_epi8( + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00, + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00); + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Fetch 32 elements (512 bytes) into 16 YMM registers. */ + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_loadu_si256((__m256i*)(src + (j * bytesoftype) + (k * sizeof(__m256i)))); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + ymm1[k*2] = _mm256_unpacklo_epi8(ymm0[l], ymm0[l+1]); + ymm1[k*2+1] = _mm256_unpackhi_epi8(ymm0[l], ymm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + ymm0[k*2] = _mm256_unpacklo_epi16(ymm1[l], ymm1[l+2]); + ymm0[k*2+1] = _mm256_unpackhi_epi16(ymm1[l], ymm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + ymm1[k*2] = _mm256_unpacklo_epi32(ymm0[l], ymm0[l+4]); + ymm1[k*2+1] = _mm256_unpackhi_epi32(ymm0[l], ymm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + ymm0[k*2] = _mm256_unpacklo_epi64(ymm1[k], ymm1[k+8]); + ymm0[k*2+1] = _mm256_unpackhi_epi64(ymm1[k], ymm1[k+8]); + } + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_permute4x64_epi64(ymm0[k], 0xd8); + ymm0[k] = _mm256_shuffle_epi8(ymm0[k], shmask); + } + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (k * total_elements)), ymm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size larger than 16 bytes. */ +static void +shuffle16_tiled_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t j; + int k, l; + __m256i ymm0[16], ymm1[16]; + + const lldiv_t vecs_per_el = lldiv(bytesoftype, sizeof(__m128i)); + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i shmask = _mm256_set_epi8( + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00, + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00); + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Advance the offset into the type by the vector size (in bytes), unless this is + the initial iteration and the type size is not a multiple of the vector size. + In that case, only advance by the number of bytes necessary so that the number + of remaining bytes in the type will be a multiple of the vector size. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el.rem > 0 ? vecs_per_el.rem : sizeof(__m128i))) { + + /* Fetch elements in groups of 512 bytes */ + const uint8_t* const src_with_offset = src + offset_into_type; + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_loadu2_m128i( + (__m128i*)(src_with_offset + (j + (2 * k) + 1) * bytesoftype), + (__m128i*)(src_with_offset + (j + (2 * k)) * bytesoftype)); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + ymm1[k*2] = _mm256_unpacklo_epi8(ymm0[l], ymm0[l+1]); + ymm1[k*2+1] = _mm256_unpackhi_epi8(ymm0[l], ymm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + ymm0[k*2] = _mm256_unpacklo_epi16(ymm1[l], ymm1[l+2]); + ymm0[k*2+1] = _mm256_unpackhi_epi16(ymm1[l], ymm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + ymm1[k*2] = _mm256_unpacklo_epi32(ymm0[l], ymm0[l+4]); + ymm1[k*2+1] = _mm256_unpackhi_epi32(ymm0[l], ymm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + ymm0[k*2] = _mm256_unpacklo_epi64(ymm1[k], ymm1[k+8]); + ymm0[k*2+1] = _mm256_unpackhi_epi64(ymm1[k], ymm1[k+8]); + } + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_permute4x64_epi64(ymm0[k], 0xd8); + ymm0[k] = _mm256_shuffle_epi8(ymm0[k], shmask); + } + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (total_elements * (offset_into_type + k))), ymm0[k]); + } + } + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 2 bytes. */ +static void +unshuffle2_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t i; + int j; + __m256i ymm0[2], ymm1[2]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Load 32 elements (64 bytes) into 2 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 2; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 2; j++) { + ymm0[j] = _mm256_permute4x64_epi64(ymm0[j], 0xd8); + } + /* Compute the low 64 bytes */ + ymm1[0] = _mm256_unpacklo_epi8(ymm0[0], ymm0[1]); + /* Compute the hi 64 bytes */ + ymm1[1] = _mm256_unpackhi_epi8(ymm0[0], ymm0[1]); + /* Store the result vectors in proper order */ + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (0 * sizeof(__m256i))), ymm1[0]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (1 * sizeof(__m256i))), ymm1[1]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 4 bytes. */ +static void +unshuffle4_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + __m256i ymm0[4], ymm1[4]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Load 32 elements (128 bytes) into 4 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 4; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 2; j++) { + /* Compute the low 64 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 64 bytes */ + ymm1[2+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 2; j++) { + /* Compute the low 64 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 64 bytes */ + ymm0[2+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + ymm1[0] = _mm256_permute2x128_si256(ymm0[0], ymm0[2], 0x20); + ymm1[1] = _mm256_permute2x128_si256(ymm0[1], ymm0[3], 0x20); + ymm1[2] = _mm256_permute2x128_si256(ymm0[0], ymm0[2], 0x31); + ymm1[3] = _mm256_permute2x128_si256(ymm0[1], ymm0[3], 0x31); + + /* Store the result vectors in proper order */ + for (j = 0; j < 4; j++) { + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (j * sizeof(__m256i))), ymm1[j]); + } + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 8 bytes. */ +static void +unshuffle8_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t i; + int j; + __m256i ymm0[8], ymm1[8]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Fetch 32 elements (256 bytes) into 8 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 8; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[4+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle words */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[4+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + for (j = 0; j < 8; j++) { + ymm0[j] = _mm256_permute4x64_epi64(ymm0[j], 0xd8); + } + + /* Shuffle 4-byte dwords */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[4+j] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + + /* Store the result vectors in proper order */ + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (0 * sizeof(__m256i))), ymm1[0]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (1 * sizeof(__m256i))), ymm1[2]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (2 * sizeof(__m256i))), ymm1[1]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (3 * sizeof(__m256i))), ymm1[3]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (4 * sizeof(__m256i))), ymm1[4]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (5 * sizeof(__m256i))), ymm1[6]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (6 * sizeof(__m256i))), ymm1[5]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (7 * sizeof(__m256i))), ymm1[7]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 16 bytes. */ +static void +unshuffle16_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t i; + int j; + __m256i ymm0[16], ymm1[16]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Fetch 32 elements (512 bytes) into 16 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 16; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi64(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi64(ymm1[j*2], ymm1[j*2+1]); + } + + for (j = 0; j < 8; j++) { + ymm1[j] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x20); + ymm1[j+8] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x31); + } + + /* Store the result vectors in proper order */ + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (0 * sizeof(__m256i))), ymm1[0]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (1 * sizeof(__m256i))), ymm1[4]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (2 * sizeof(__m256i))), ymm1[2]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (3 * sizeof(__m256i))), ymm1[6]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (4 * sizeof(__m256i))), ymm1[1]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (5 * sizeof(__m256i))), ymm1[5]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (6 * sizeof(__m256i))), ymm1[3]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (7 * sizeof(__m256i))), ymm1[7]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (8 * sizeof(__m256i))), ymm1[8]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (9 * sizeof(__m256i))), ymm1[12]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (10 * sizeof(__m256i))), ymm1[10]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (11 * sizeof(__m256i))), ymm1[14]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (12 * sizeof(__m256i))), ymm1[9]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (13 * sizeof(__m256i))), ymm1[13]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (14 * sizeof(__m256i))), ymm1[11]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (15 * sizeof(__m256i))), ymm1[15]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size larger than 16 bytes. */ +static void +unshuffle16_tiled_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t i; + int j; + __m256i ymm0[16], ymm1[16]; + + const lldiv_t vecs_per_el = lldiv(bytesoftype, sizeof(__m128i)); + + /* The unshuffle loops are inverted (compared to shuffle_tiled16_avx2) + to optimize cache utilization. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el.rem > 0 ? vecs_per_el.rem : sizeof(__m128i))) { + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Load the first 16 bytes of 32 adjacent elements (512 bytes) into 16 YMM registers */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 16; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (total_elements * (offset_into_type + j)))); + } + + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi64(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi64(ymm1[j*2], ymm1[j*2+1]); + } + + for (j = 0; j < 8; j++) { + ymm1[j] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x20); + ymm1[j+8] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x31); + } + + /* Store the result vectors in proper order */ + const uint8_t* const dest_with_offset = dest + offset_into_type; + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x01) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x00) * bytesoftype), ymm1[0]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x03) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x02) * bytesoftype), ymm1[4]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x05) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x04) * bytesoftype), ymm1[2]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x07) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x06) * bytesoftype), ymm1[6]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x09) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x08) * bytesoftype), ymm1[1]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x0b) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x0a) * bytesoftype), ymm1[5]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x0d) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x0c) * bytesoftype), ymm1[3]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x0f) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x0e) * bytesoftype), ymm1[7]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x11) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x10) * bytesoftype), ymm1[8]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x13) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x12) * bytesoftype), ymm1[12]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x15) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x14) * bytesoftype), ymm1[10]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x17) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x16) * bytesoftype), ymm1[14]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x19) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x18) * bytesoftype), ymm1[9]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x1b) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x1a) * bytesoftype), ymm1[13]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x1d) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x1c) * bytesoftype), ymm1[11]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x1f) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x1e) * bytesoftype), ymm1[15]); + } + } +} + +/* Shuffle a block. This can never fail. */ +void +shuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m256i); + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + shuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized shuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + /* Optimized shuffle implementations */ + switch (bytesoftype) + { + case 2: + shuffle2_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + shuffle4_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + shuffle8_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + shuffle16_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + /* For types larger than 16 bytes, use the AVX2 tiled shuffle. */ + if (bytesoftype > sizeof(__m128i)) { + shuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized shuffle */ + shuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + shuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} + +/* Unshuffle a block. This can never fail. */ +void +unshuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m256i); + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + unshuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized unshuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + /* Optimized unshuffle implementations */ + switch (bytesoftype) + { + case 2: + unshuffle2_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + unshuffle4_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + unshuffle8_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + unshuffle16_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + /* For types larger than 16 bytes, use the AVX2 tiled unshuffle. */ + if (bytesoftype > sizeof(__m128i)) { + unshuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized unshuffle */ + unshuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + unshuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} diff --git a/c-blosc/blosc/shuffle-avx2.h b/c-blosc/blosc/shuffle-avx2.h new file mode 100644 index 0000000..c638fb6 --- /dev/null +++ b/c-blosc/blosc/shuffle-avx2.h @@ -0,0 +1,36 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* AVX2-accelerated shuffle/unshuffle routines. */ + +#ifndef SHUFFLE_AVX2_H +#define SHUFFLE_AVX2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + AVX2-accelerated shuffle routine. +*/ +BLOSC_NO_EXPORT void shuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +/** + AVX2-accelerated unshuffle routine. +*/ +BLOSC_NO_EXPORT void unshuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_AVX2_H */ diff --git a/c-blosc/blosc/shuffle-generic.c b/c-blosc/blosc/shuffle-generic.c new file mode 100644 index 0000000..46c6e83 --- /dev/null +++ b/c-blosc/blosc/shuffle-generic.c @@ -0,0 +1,25 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle-generic.h" + +/* Shuffle a block. This can never fail. */ +void shuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + /* Non-optimized shuffle */ + shuffle_generic_inline(bytesoftype, 0, blocksize, _src, _dest); +} + +/* Unshuffle a block. This can never fail. */ +void unshuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + /* Non-optimized unshuffle */ + unshuffle_generic_inline(bytesoftype, 0, blocksize, _src, _dest); +} diff --git a/c-blosc/blosc/shuffle-generic.h b/c-blosc/blosc/shuffle-generic.h new file mode 100644 index 0000000..1319855 --- /dev/null +++ b/c-blosc/blosc/shuffle-generic.h @@ -0,0 +1,99 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Generic (non-hardware-accelerated) shuffle/unshuffle routines. + These are used when hardware-accelerated functions aren't available + for a particular platform; they are also used by the hardware- + accelerated functions to handle any remaining elements in a block + which isn't a multiple of the hardware's vector size. */ + +#ifndef SHUFFLE_GENERIC_H +#define SHUFFLE_GENERIC_H + +#include "blosc-common.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Generic (non-hardware-accelerated) shuffle routine. + This is the pure element-copying nested loop. It is used by the + generic shuffle implementation and also by the vectorized shuffle + implementations to process any remaining elements in a block which + is not a multiple of (type_size * vector_size). +*/ +static void shuffle_generic_inline(const size_t type_size, + const size_t vectorizable_blocksize, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + size_t i, j; + /* Calculate the number of elements in the block. */ + const size_t neblock_quot = blocksize / type_size; + const size_t neblock_rem = blocksize % type_size; + const size_t vectorizable_elements = vectorizable_blocksize / type_size; + + + /* Non-optimized shuffle */ + for (j = 0; j < type_size; j++) { + for (i = vectorizable_elements; i < (size_t)neblock_quot; i++) { + _dest[j*neblock_quot+i] = _src[i*type_size+j]; + } + } + + /* Copy any leftover bytes in the block without shuffling them. */ + memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem); +} + +/** + Generic (non-hardware-accelerated) unshuffle routine. + This is the pure element-copying nested loop. It is used by the + generic unshuffle implementation and also by the vectorized unshuffle + implementations to process any remaining elements in a block which + is not a multiple of (type_size * vector_size). +*/ +static void unshuffle_generic_inline(const size_t type_size, + const size_t vectorizable_blocksize, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + size_t i, j; + + /* Calculate the number of elements in the block. */ + const size_t neblock_quot = blocksize / type_size; + const size_t neblock_rem = blocksize % type_size; + const size_t vectorizable_elements = vectorizable_blocksize / type_size; + + /* Non-optimized unshuffle */ + for (i = vectorizable_elements; i < (size_t)neblock_quot; i++) { + for (j = 0; j < type_size; j++) { + _dest[i*type_size+j] = _src[j*neblock_quot+i]; + } + } + + /* Copy any leftover bytes in the block without unshuffling them. */ + memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem); +} + +/** + Generic (non-hardware-accelerated) shuffle routine. +*/ +BLOSC_NO_EXPORT void shuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +/** + Generic (non-hardware-accelerated) unshuffle routine. +*/ +BLOSC_NO_EXPORT void unshuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_GENERIC_H */ diff --git a/c-blosc/blosc/shuffle-sse2.c b/c-blosc/blosc/shuffle-sse2.c new file mode 100644 index 0000000..c829ffb --- /dev/null +++ b/c-blosc/blosc/shuffle-sse2.c @@ -0,0 +1,626 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle-generic.h" +#include "shuffle-sse2.h" + +/* Make sure SSE2 is available for the compilation target and compiler. */ +#if !defined(__SSE2__) + #error SSE2 is not supported by the target architecture/platform and/or this compiler. +#endif + +#include + + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + +static void printxmm(__m128i xmm0) +{ + uint8_t buf[16]; + + ((__m128i *)buf)[0] = xmm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15]); +} +#endif + + +/* Routine optimized for shuffling a buffer for a type size of 2 bytes. */ +static void +shuffle2_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t j; + int k; + uint8_t* dest_for_jth_element; + __m128i xmm0[2], xmm1[2]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Fetch 16 elements (32 bytes) then transpose bytes, words and double words. */ + for (k = 0; k < 2; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src + (j * bytesoftype) + (k * sizeof(__m128i)))); + xmm0[k] = _mm_shufflelo_epi16(xmm0[k], 0xd8); + xmm0[k] = _mm_shufflehi_epi16(xmm0[k], 0xd8); + xmm0[k] = _mm_shuffle_epi32(xmm0[k], 0xd8); + xmm1[k] = _mm_shuffle_epi32(xmm0[k], 0x4e); + xmm0[k] = _mm_unpacklo_epi8(xmm0[k], xmm1[k]); + xmm0[k] = _mm_shuffle_epi32(xmm0[k], 0xd8); + xmm1[k] = _mm_shuffle_epi32(xmm0[k], 0x4e); + xmm0[k] = _mm_unpacklo_epi16(xmm0[k], xmm1[k]); + xmm0[k] = _mm_shuffle_epi32(xmm0[k], 0xd8); + } + /* Transpose quad words */ + for (k = 0; k < 1; k++) { + xmm1[k*2] = _mm_unpacklo_epi64(xmm0[k], xmm0[k+1]); + xmm1[k*2+1] = _mm_unpackhi_epi64(xmm0[k], xmm0[k+1]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 2; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (k * total_elements)), xmm1[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 4 bytes. */ +static void +shuffle4_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + uint8_t* dest_for_ith_element; + __m128i xmm0[4], xmm1[4]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Fetch 16 elements (64 bytes) then transpose bytes and words. */ + for (j = 0; j < 4; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src + (i * bytesoftype) + (j * sizeof(__m128i)))); + xmm1[j] = _mm_shuffle_epi32(xmm0[j], 0xd8); + xmm0[j] = _mm_shuffle_epi32(xmm0[j], 0x8d); + xmm0[j] = _mm_unpacklo_epi8(xmm1[j], xmm0[j]); + xmm1[j] = _mm_shuffle_epi32(xmm0[j], 0x04e); + xmm0[j] = _mm_unpacklo_epi16(xmm0[j], xmm1[j]); + } + /* Transpose double words */ + for (j = 0; j < 2; j++) { + xmm1[j*2] = _mm_unpacklo_epi32(xmm0[j*2], xmm0[j*2+1]); + xmm1[j*2+1] = _mm_unpackhi_epi32(xmm0[j*2], xmm0[j*2+1]); + } + /* Transpose quad words */ + for (j = 0; j < 2; j++) { + xmm0[j*2] = _mm_unpacklo_epi64(xmm1[j], xmm1[j+2]); + xmm0[j*2+1] = _mm_unpackhi_epi64(xmm1[j], xmm1[j+2]); + } + /* Store the result vectors */ + dest_for_ith_element = dest + i; + for (j = 0; j < 4; j++) { + _mm_storeu_si128((__m128i*)(dest_for_ith_element + (j * total_elements)), xmm0[j]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 8 bytes. */ +static void +shuffle8_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t j; + int k, l; + uint8_t* dest_for_jth_element; + __m128i xmm0[8], xmm1[8]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Fetch 16 elements (128 bytes) then transpose bytes. */ + for (k = 0; k < 8; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src + (j * bytesoftype) + (k * sizeof(__m128i)))); + xmm1[k] = _mm_shuffle_epi32(xmm0[k], 0x4e); + xmm1[k] = _mm_unpacklo_epi8(xmm0[k], xmm1[k]); + } + /* Transpose words */ + for (k = 0, l = 0; k < 4; k++, l +=2) { + xmm0[k*2] = _mm_unpacklo_epi16(xmm1[l], xmm1[l+1]); + xmm0[k*2+1] = _mm_unpackhi_epi16(xmm1[l], xmm1[l+1]); + } + /* Transpose double words */ + for (k = 0, l = 0; k < 4; k++, l++) { + if (k == 2) l += 2; + xmm1[k*2] = _mm_unpacklo_epi32(xmm0[l], xmm0[l+2]); + xmm1[k*2+1] = _mm_unpackhi_epi32(xmm0[l], xmm0[l+2]); + } + /* Transpose quad words */ + for (k = 0; k < 4; k++) { + xmm0[k*2] = _mm_unpacklo_epi64(xmm1[k], xmm1[k+4]); + xmm0[k*2+1] = _mm_unpackhi_epi64(xmm1[k], xmm1[k+4]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 8; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (k * total_elements)), xmm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 16 bytes. */ +static void +shuffle16_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t j; + int k, l; + uint8_t* dest_for_jth_element; + __m128i xmm0[16], xmm1[16]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Fetch 16 elements (256 bytes). */ + for (k = 0; k < 16; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src + (j * bytesoftype) + (k * sizeof(__m128i)))); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + xmm1[k*2] = _mm_unpacklo_epi8(xmm0[l], xmm0[l+1]); + xmm1[k*2+1] = _mm_unpackhi_epi8(xmm0[l], xmm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + xmm0[k*2] = _mm_unpacklo_epi16(xmm1[l], xmm1[l+2]); + xmm0[k*2+1] = _mm_unpackhi_epi16(xmm1[l], xmm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + xmm1[k*2] = _mm_unpacklo_epi32(xmm0[l], xmm0[l+4]); + xmm1[k*2+1] = _mm_unpackhi_epi32(xmm0[l], xmm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + xmm0[k*2] = _mm_unpacklo_epi64(xmm1[k], xmm1[k+8]); + xmm0[k*2+1] = _mm_unpackhi_epi64(xmm1[k], xmm1[k+8]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (k * total_elements)), xmm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size larger than 16 bytes. */ +static void +shuffle16_tiled_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t j; + const size_t vecs_per_el_rem = bytesoftype % sizeof(__m128i); + int k, l; + uint8_t* dest_for_jth_element; + __m128i xmm0[16], xmm1[16]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Advance the offset into the type by the vector size (in bytes), unless this is + the initial iteration and the type size is not a multiple of the vector size. + In that case, only advance by the number of bytes necessary so that the number + of remaining bytes in the type will be a multiple of the vector size. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el_rem > 0 ? vecs_per_el_rem : sizeof(__m128i))) { + + /* Fetch elements in groups of 256 bytes */ + const uint8_t* const src_with_offset = src + offset_into_type; + for (k = 0; k < 16; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src_with_offset + (j + k) * bytesoftype)); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + xmm1[k*2] = _mm_unpacklo_epi8(xmm0[l], xmm0[l+1]); + xmm1[k*2+1] = _mm_unpackhi_epi8(xmm0[l], xmm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + xmm0[k*2] = _mm_unpacklo_epi16(xmm1[l], xmm1[l+2]); + xmm0[k*2+1] = _mm_unpackhi_epi16(xmm1[l], xmm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + xmm1[k*2] = _mm_unpacklo_epi32(xmm0[l], xmm0[l+4]); + xmm1[k*2+1] = _mm_unpackhi_epi32(xmm0[l], xmm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + xmm0[k*2] = _mm_unpacklo_epi64(xmm1[k], xmm1[k+8]); + xmm0[k*2+1] = _mm_unpackhi_epi64(xmm1[k], xmm1[k+8]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (total_elements * (offset_into_type + k))), xmm0[k]); + } + } + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 2 bytes. */ +static void +unshuffle2_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t i; + int j; + __m128i xmm0[2], xmm1[2]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (32 bytes) into 2 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 2; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + /* Compute the low 32 bytes */ + xmm1[0] = _mm_unpacklo_epi8(xmm0[0], xmm0[1]); + /* Compute the hi 32 bytes */ + xmm1[1] = _mm_unpackhi_epi8(xmm0[0], xmm0[1]); + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm1[1]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 4 bytes. */ +static void +unshuffle4_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + __m128i xmm0[4], xmm1[4]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (64 bytes) into 4 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 4; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 2; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi8(xmm0[j*2], xmm0[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[2+j] = _mm_unpackhi_epi8(xmm0[j*2], xmm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 2; j++) { + /* Compute the low 32 bytes */ + xmm0[j] = _mm_unpacklo_epi16(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm0[2+j] = _mm_unpackhi_epi16(xmm1[j*2], xmm1[j*2+1]); + } + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm0[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm0[2]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (2 * sizeof(__m128i))), xmm0[1]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (3 * sizeof(__m128i))), xmm0[3]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 8 bytes. */ +static void +unshuffle8_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t i; + int j; + __m128i xmm0[8], xmm1[8]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (128 bytes) into 8 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 8; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi8(xmm0[j*2], xmm0[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[4+j] = _mm_unpackhi_epi8(xmm0[j*2], xmm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + xmm0[j] = _mm_unpacklo_epi16(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm0[4+j] = _mm_unpackhi_epi16(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi32(xmm0[j*2], xmm0[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[4+j] = _mm_unpackhi_epi32(xmm0[j*2], xmm0[j*2+1]); + } + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm1[4]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (2 * sizeof(__m128i))), xmm1[2]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (3 * sizeof(__m128i))), xmm1[6]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (4 * sizeof(__m128i))), xmm1[1]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (5 * sizeof(__m128i))), xmm1[5]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (6 * sizeof(__m128i))), xmm1[3]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (7 * sizeof(__m128i))), xmm1[7]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 16 bytes. */ +static void +unshuffle16_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t i; + int j; + __m128i xmm1[16], xmm2[16]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (256 bytes) into 16 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 16; j++) { + xmm1[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi8(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi8(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi16(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi16(xmm2[j*2], xmm2[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi32(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi32(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi64(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi64(xmm2[j*2], xmm2[j*2+1]); + } + + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm1[8]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (2 * sizeof(__m128i))), xmm1[4]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (3 * sizeof(__m128i))), xmm1[12]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (4 * sizeof(__m128i))), xmm1[2]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (5 * sizeof(__m128i))), xmm1[10]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (6 * sizeof(__m128i))), xmm1[6]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (7 * sizeof(__m128i))), xmm1[14]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (8 * sizeof(__m128i))), xmm1[1]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (9 * sizeof(__m128i))), xmm1[9]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (10 * sizeof(__m128i))), xmm1[5]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (11 * sizeof(__m128i))), xmm1[13]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (12 * sizeof(__m128i))), xmm1[3]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (13 * sizeof(__m128i))), xmm1[11]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (14 * sizeof(__m128i))), xmm1[7]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (15 * sizeof(__m128i))), xmm1[15]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size larger than 16 bytes. */ +static void +unshuffle16_tiled_sse2(uint8_t* const dest, const uint8_t* const orig, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t i; + const size_t vecs_per_el_rem = bytesoftype % sizeof(__m128i); + + int j; + uint8_t* dest_with_offset; + __m128i xmm1[16], xmm2[16]; + + /* The unshuffle loops are inverted (compared to shuffle_tiled16_sse2) + to optimize cache utilization. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el_rem > 0 ? vecs_per_el_rem : sizeof(__m128i))) { + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load the first 128 bytes in 16 XMM registers */ + const uint8_t* const src_for_ith_element = orig + i; + for (j = 0; j < 16; j++) { + xmm1[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (total_elements * (offset_into_type + j)))); + } + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi8(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi8(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi16(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi16(xmm2[j*2], xmm2[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi32(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi32(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi64(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi64(xmm2[j*2], xmm2[j*2+1]); + } + + /* Store the result vectors in proper order */ + dest_with_offset = dest + offset_into_type; + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 0) * bytesoftype), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 1) * bytesoftype), xmm1[8]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 2) * bytesoftype), xmm1[4]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 3) * bytesoftype), xmm1[12]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 4) * bytesoftype), xmm1[2]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 5) * bytesoftype), xmm1[10]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 6) * bytesoftype), xmm1[6]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 7) * bytesoftype), xmm1[14]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 8) * bytesoftype), xmm1[1]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 9) * bytesoftype), xmm1[9]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 10) * bytesoftype), xmm1[5]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 11) * bytesoftype), xmm1[13]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 12) * bytesoftype), xmm1[3]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 13) * bytesoftype), xmm1[11]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 14) * bytesoftype), xmm1[7]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 15) * bytesoftype), xmm1[15]); + } + } +} + +/* Shuffle a block. This can never fail. */ +void +shuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m128i); + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized shuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + shuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* Optimized shuffle implementations */ + switch (bytesoftype) + { + case 2: + shuffle2_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + shuffle4_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + shuffle8_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + shuffle16_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + if (bytesoftype > sizeof(__m128i)) { + shuffle16_tiled_sse2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized shuffle */ + shuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + shuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} + +/* Unshuffle a block. This can never fail. */ +void +unshuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m128i); + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized unshuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + unshuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* Optimized unshuffle implementations */ + switch (bytesoftype) + { + case 2: + unshuffle2_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + unshuffle4_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + unshuffle8_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + unshuffle16_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + if (bytesoftype > sizeof(__m128i)) { + unshuffle16_tiled_sse2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized unshuffle */ + unshuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + unshuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} diff --git a/c-blosc/blosc/shuffle-sse2.h b/c-blosc/blosc/shuffle-sse2.h new file mode 100644 index 0000000..63263a1 --- /dev/null +++ b/c-blosc/blosc/shuffle-sse2.h @@ -0,0 +1,36 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* SSE2-accelerated shuffle/unshuffle routines. */ + +#ifndef SHUFFLE_SSE2_H +#define SHUFFLE_SSE2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + SSE2-accelerated shuffle routine. +*/ +BLOSC_NO_EXPORT void shuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +/** + SSE2-accelerated unshuffle routine. +*/ +BLOSC_NO_EXPORT void unshuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_SSE2_H */ diff --git a/c-blosc/blosc/shuffle.c b/c-blosc/blosc/shuffle.c new file mode 100644 index 0000000..2d3524b --- /dev/null +++ b/c-blosc/blosc/shuffle.c @@ -0,0 +1,442 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2009-05-20 + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle.h" +#include "shuffle-generic.h" +#include "bitshuffle-generic.h" +#include + +/* Visual Studio < 2013 does not have stdbool.h so here it is a replacement: */ +#if defined __STDC__ && defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +/* have a C99 compiler */ +typedef _Bool bool; +#else +/* do not have a C99 compiler */ +typedef unsigned char bool; +#endif + + +#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define HAVE_CPU_FEAT_INTRIN +#endif + +/* Include hardware-accelerated shuffle/unshuffle routines based on + the target architecture. Note that a target architecture may support + more than one type of acceleration!*/ +#if defined(SHUFFLE_AVX2_ENABLED) + #include "shuffle-avx2.h" + #include "bitshuffle-avx2.h" +#endif /* defined(SHUFFLE_AVX2_ENABLED) */ + +#if defined(SHUFFLE_SSE2_ENABLED) + #include "shuffle-sse2.h" + #include "bitshuffle-sse2.h" +#endif /* defined(SHUFFLE_SSE2_ENABLED) */ + + +/* Define function pointer types for shuffle/unshuffle routines. */ +typedef void(*shuffle_func)(const size_t, const size_t, const uint8_t*, const uint8_t*); +typedef void(*unshuffle_func)(const size_t, const size_t, const uint8_t*, const uint8_t*); +typedef int64_t(*bitshuffle_func)(void*, void*, const size_t, const size_t, void*); +typedef int64_t(*bitunshuffle_func)(void*, void*, const size_t, const size_t, void*); + +/* An implementation of shuffle/unshuffle routines. */ +typedef struct shuffle_implementation { + /* Name of this implementation. */ + const char* name; + /* Function pointer to the shuffle routine for this implementation. */ + shuffle_func shuffle; + /* Function pointer to the unshuffle routine for this implementation. */ + unshuffle_func unshuffle; + /* Function pointer to the bitshuffle routine for this implementation. */ + bitshuffle_func bitshuffle; + /* Function pointer to the bitunshuffle routine for this implementation. */ + bitunshuffle_func bitunshuffle; +} shuffle_implementation_t; + +typedef enum { + BLOSC_HAVE_NOTHING = 0, + BLOSC_HAVE_SSE2 = 1, + BLOSC_HAVE_AVX2 = 2 +} blosc_cpu_features; + +/* Detect hardware and set function pointers to the best shuffle/unshuffle + implementations supported by the host processor. */ +#if defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED) /* Intel/i686 */ + +/* Disabled the __builtin_cpu_supports() call, as it has issues with + new versions of gcc (like 5.3.1 in forthcoming ubuntu/xenial: + "undefined symbol: __cpu_model" + For a similar report, see: + https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/ZM2L65WIZEEQHHLFERZYD5FAG7QY2OGB/ +*/ +#if defined(HAVE_CPU_FEAT_INTRIN) && 0 +static blosc_cpu_features blosc_get_cpu_features(void) { + blosc_cpu_features cpu_features = BLOSC_HAVE_NOTHING; + if (__builtin_cpu_supports("sse2")) { + cpu_features |= BLOSC_HAVE_SSE2; + } + if (__builtin_cpu_supports("avx2")) { + cpu_features |= BLOSC_HAVE_AVX2; + } + return cpu_features; +} +#else + +#if defined(_MSC_VER) && !defined(__clang__) + #include /* Needed for __cpuid */ + +/* _xgetbv is only supported by VS2010 SP1 and newer versions of VS. */ +#if _MSC_FULL_VER >= 160040219 + #include /* Needed for _xgetbv */ +#elif defined(_M_IX86) + +/* Implement _xgetbv for VS2008 and VS2010 RTM with 32-bit (x86) targets. */ + +static uint64_t _xgetbv(uint32_t xcr) { + uint32_t xcr0, xcr1; + __asm { + mov ecx, xcr + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 + mov xcr0, eax + mov xcr1, edx + } + return ((uint64_t)xcr1 << 32) | xcr0; +} + +#elif defined(_M_X64) + +/* Implement _xgetbv for VS2008 and VS2010 RTM with 64-bit (x64) targets. + These compilers don't support any of the newer acceleration ISAs + (e.g., AVX2) supported by blosc, and all x64 hardware supports SSE2 + which means we can get away with returning a hard-coded value from + this implementation of _xgetbv. */ + +static __inline uint64_t _xgetbv(uint32_t xcr) { + /* A 64-bit OS must have XMM save support. */ + return (xcr == 0 ? (1UL << 1) : 0UL); +} + +#else + +/* Hardware detection for any other MSVC targets (e.g., ARM) + isn't implemented at this time. */ +#error This version of c-blosc only supports x86 and x64 targets with MSVC. + +#endif /* _MSC_FULL_VER >= 160040219 */ + +#else + +/* Implement the __cpuid and __cpuidex intrinsics for GCC, Clang, + and others using inline assembly. */ +__attribute__((always_inline)) +static inline void +__cpuidex(int32_t cpuInfo[4], int32_t function_id, int32_t subfunction_id) { + __asm__ __volatile__ ( +# if defined(__i386__) && defined (__PIC__) + /* Can't clobber ebx with PIC running under 32-bit, so it needs to be manually restored. + https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + */ + "movl %%ebx, %%edi\n\t" + "cpuid\n\t" + "xchgl %%ebx, %%edi": + "=D" (cpuInfo[1]), +#else + "cpuid": + "=b" (cpuInfo[1]), +#endif /* defined(__i386) && defined(__PIC__) */ + "=a" (cpuInfo[0]), + "=c" (cpuInfo[2]), + "=d" (cpuInfo[3]) : + "a" (function_id), "c" (subfunction_id) + ); +} + +#define __cpuid(cpuInfo, function_id) __cpuidex(cpuInfo, function_id, 0) + +#define _XCR_XFEATURE_ENABLED_MASK 0 + +/* Reads the content of an extended control register. + https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family +*/ +static inline uint64_t +_xgetbv(uint32_t xcr) { + uint32_t eax, edx; + __asm__ __volatile__ ( + /* "xgetbv" + This is specified as raw instruction bytes due to some older compilers + having issues with the mnemonic form. + */ + ".byte 0x0f, 0x01, 0xd0": + "=a" (eax), + "=d" (edx) : + "c" (xcr) + ); + return ((uint64_t)edx << 32) | eax; +} + +#endif /* defined(_MSC_FULL_VER) */ + +#ifndef _XCR_XFEATURE_ENABLED_MASK +#define _XCR_XFEATURE_ENABLED_MASK 0x0 +#endif + +static blosc_cpu_features blosc_get_cpu_features(void) { + blosc_cpu_features result = BLOSC_HAVE_NOTHING; + int32_t max_basic_function_id; + /* Holds the values of eax, ebx, ecx, edx set by the `cpuid` instruction */ + int32_t cpu_info[4]; + int sse2_available; + int sse3_available; + int ssse3_available; + int sse41_available; + int sse42_available; + int xsave_available; + int xsave_enabled_by_os; + int avx2_available = 0; + int avx512bw_available = 0; + int xmm_state_enabled = 0; + int ymm_state_enabled = 0; + int zmm_state_enabled = 0; + uint64_t xcr0_contents; + char* envvar; + + /* Get the number of basic functions available. */ + __cpuid(cpu_info, 0); + max_basic_function_id = cpu_info[0]; + + /* Check for SSE-based features and required OS support */ + __cpuid(cpu_info, 1); + sse2_available = (cpu_info[3] & (1 << 26)) != 0; + sse3_available = (cpu_info[2] & (1 << 0)) != 0; + ssse3_available = (cpu_info[2] & (1 << 9)) != 0; + sse41_available = (cpu_info[2] & (1 << 19)) != 0; + sse42_available = (cpu_info[2] & (1 << 20)) != 0; + + xsave_available = (cpu_info[2] & (1 << 26)) != 0; + xsave_enabled_by_os = (cpu_info[2] & (1 << 27)) != 0; + + /* Check for AVX-based features, if the processor supports extended features. */ + if (max_basic_function_id >= 7) { + __cpuid(cpu_info, 7); + avx2_available = (cpu_info[1] & (1 << 5)) != 0; + avx512bw_available = (cpu_info[1] & (1 << 30)) != 0; + } + + /* Even if certain features are supported by the CPU, they may not be supported + by the OS (in which case using them would crash the process or system). + If xsave is available and enabled by the OS, check the contents of the + extended control register XCR0 to see if the CPU features are enabled. */ +#if defined(_XCR_XFEATURE_ENABLED_MASK) + if (xsave_available && xsave_enabled_by_os && ( + sse2_available || sse3_available || ssse3_available + || sse41_available || sse42_available + || avx2_available || avx512bw_available)) { + /* Determine which register states can be restored by the OS. */ + xcr0_contents = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + + xmm_state_enabled = (xcr0_contents & (1UL << 1)) != 0; + ymm_state_enabled = (xcr0_contents & (1UL << 2)) != 0; + + /* Require support for both the upper 256-bits of zmm0-zmm15 to be + restored as well as all of zmm16-zmm31 and the opmask registers. */ + zmm_state_enabled = (xcr0_contents & 0x70) == 0x70; + } +#endif /* defined(_XCR_XFEATURE_ENABLED_MASK) */ + + envvar = getenv("BLOSC_PRINT_SHUFFLE_ACCEL"); + if (envvar != NULL) { + printf("Shuffle CPU Information:\n"); + printf("SSE2 available: %s\n", sse2_available ? "True" : "False"); + printf("SSE3 available: %s\n", sse3_available ? "True" : "False"); + printf("SSSE3 available: %s\n", ssse3_available ? "True" : "False"); + printf("SSE4.1 available: %s\n", sse41_available ? "True" : "False"); + printf("SSE4.2 available: %s\n", sse42_available ? "True" : "False"); + printf("AVX2 available: %s\n", avx2_available ? "True" : "False"); + printf("AVX512BW available: %s\n", avx512bw_available ? "True" : "False"); + printf("XSAVE available: %s\n", xsave_available ? "True" : "False"); + printf("XSAVE enabled: %s\n", xsave_enabled_by_os ? "True" : "False"); + printf("XMM state enabled: %s\n", xmm_state_enabled ? "True" : "False"); + printf("YMM state enabled: %s\n", ymm_state_enabled ? "True" : "False"); + printf("ZMM state enabled: %s\n", zmm_state_enabled ? "True" : "False"); + } + + /* Using the gathered CPU information, determine which implementation to use. */ + /* technically could fail on sse2 cpu on os without xmm support, but that + * shouldn't exist anymore */ + if (sse2_available) { + result |= BLOSC_HAVE_SSE2; + } + if (xmm_state_enabled && ymm_state_enabled && avx2_available) { + result |= BLOSC_HAVE_AVX2; + } + return result; +} +#endif + +#else /* No hardware acceleration supported for the target architecture. */ + #if defined(_MSC_VER) + #pragma message("Hardware-acceleration detection not implemented for the target architecture. Only the generic shuffle/unshuffle routines will be available.") + #else + #warning Hardware-acceleration detection not implemented for the target architecture. Only the generic shuffle/unshuffle routines will be available. + #endif + +static blosc_cpu_features blosc_get_cpu_features(void) { + return BLOSC_HAVE_NOTHING; +} + +#endif + +static shuffle_implementation_t get_shuffle_implementation(void) { + blosc_cpu_features cpu_features = blosc_get_cpu_features(); + shuffle_implementation_t impl_generic; + +#if defined(SHUFFLE_AVX2_ENABLED) + if (cpu_features & BLOSC_HAVE_AVX2) { + shuffle_implementation_t impl_avx2; + impl_avx2.name = "avx2"; + impl_avx2.shuffle = (shuffle_func)shuffle_avx2; + impl_avx2.unshuffle = (unshuffle_func)unshuffle_avx2; + impl_avx2.bitshuffle = (bitshuffle_func)bshuf_trans_bit_elem_avx2; + impl_avx2.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_avx2; + return impl_avx2; + } +#endif /* defined(SHUFFLE_AVX2_ENABLED) */ + +#if defined(SHUFFLE_SSE2_ENABLED) + if (cpu_features & BLOSC_HAVE_SSE2) { + shuffle_implementation_t impl_sse2; + impl_sse2.name = "sse2"; + impl_sse2.shuffle = (shuffle_func)shuffle_sse2; + impl_sse2.unshuffle = (unshuffle_func)unshuffle_sse2; + impl_sse2.bitshuffle = (bitshuffle_func)bshuf_trans_bit_elem_sse2; + impl_sse2.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_sse2; + return impl_sse2; + } +#endif /* defined(SHUFFLE_SSE2_ENABLED) */ + + /* Processor doesn't support any of the hardware-accelerated implementations, + so use the generic implementation. */ + impl_generic.name = "generic"; + impl_generic.shuffle = (shuffle_func)shuffle_generic; + impl_generic.unshuffle = (unshuffle_func)unshuffle_generic; + impl_generic.bitshuffle = (bitshuffle_func)bshuf_trans_bit_elem_scal; + impl_generic.bitunshuffle = (bitunshuffle_func)bshuf_untrans_bit_elem_scal; + return impl_generic; +} + + +/* Flag indicating whether the implementation has been initialized. + Zero means it hasn't been initialized, non-zero means it has. */ +static int32_t implementation_initialized; + +/* The dynamically-chosen shuffle/unshuffle implementation. + This is only safe to use once `implementation_initialized` is set. */ +static shuffle_implementation_t host_implementation; + +/* Initialize the shuffle implementation, if necessary. */ +#if defined(__GNUC__) || defined(__clang__) +__attribute__((always_inline)) +#endif +static +#if defined(_MSC_VER) +__forceinline +#else +inline +#endif +void init_shuffle_implementation(void) { + /* Initialization could (in rare cases) take place concurrently on + multiple threads, but it shouldn't matter because the + initialization should return the same result on each thread (so + the implementation will be the same). Since that's the case we + can avoid complicated synchronization here and get a small + performance benefit because we don't need to perform a volatile + load on the initialization variable each time this function is + called. */ +#if defined(__GNUC__) || defined(__clang__) + if (__builtin_expect(!implementation_initialized, 0)) { +#else + if (!implementation_initialized) { +#endif + /* Initialize the implementation. */ + host_implementation = get_shuffle_implementation(); + + /* Set the flag indicating the implementation has been initialized. */ + implementation_initialized = 1; + } +} + +/* Shuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +void +shuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest) { + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + /* The implementation is initialized. + Dispatch to it's shuffle routine. */ + (host_implementation.shuffle)(bytesoftype, blocksize, _src, _dest); +} + +/* Unshuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +void +unshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest) { + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + /* The implementation is initialized. + Dispatch to it's unshuffle routine. */ + (host_implementation.unshuffle)(bytesoftype, blocksize, _src, _dest); +} + +/* Bit-shuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +int +bitshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp) { + int size = blocksize / bytesoftype; + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + if ((size % 8) == 0) + /* The number of elems is a multiple of 8 which is supported by + bitshuffle. */ + return (int)(host_implementation.bitshuffle)((void*)_src, (void*)_dest, + blocksize / bytesoftype, + bytesoftype, (void*)_tmp); + else + memcpy((void*)_dest, (void*)_src, blocksize); + return size; +} + +/* Bit-unshuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +int +bitunshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp) { + int size = blocksize / bytesoftype; + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + if ((size % 8) == 0) + /* The number of elems is a multiple of 8 which is supported by + bitshuffle. */ + return (int)(host_implementation.bitunshuffle)((void*)_src, (void*)_dest, + blocksize / bytesoftype, + bytesoftype, (void*)_tmp); + else + memcpy((void*)_dest, (void*)_src, blocksize); + return size; +} diff --git a/c-blosc/blosc/shuffle.h b/c-blosc/blosc/shuffle.h new file mode 100644 index 0000000..8b3823d --- /dev/null +++ b/c-blosc/blosc/shuffle.h @@ -0,0 +1,67 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Shuffle/unshuffle routines which dynamically dispatch to hardware- + accelerated routines based on the processor's architecture. + Consumers should almost always prefer to call these routines instead + of directly calling one of the hardware-accelerated routines, since + these are cross-platform and future-proof. */ + +#ifndef SHUFFLE_H +#define SHUFFLE_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Primary shuffle and bitshuffle routines. + This function dynamically dispatches to the appropriate hardware-accelerated + routine based on the host processor's architecture. If the host processor is + not supported by any of the hardware-accelerated routines, the generic + (non-accelerated) implementation is used instead. + Consumers should almost always prefer to call this routine instead of directly + calling the hardware-accelerated routines because this method is both cross- + platform and future-proof. +*/ +BLOSC_NO_EXPORT void +shuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest); + +BLOSC_NO_EXPORT int +bitshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp); + +/** + Primary unshuffle and bitunshuffle routine. + This function dynamically dispatches to the appropriate hardware-accelerated + routine based on the host processor's architecture. If the host processor is + not supported by any of the hardware-accelerated routines, the generic + (non-accelerated) implementation is used instead. + Consumers should almost always prefer to call this routine instead of directly + calling the hardware-accelerated routines because this method is both cross- + platform and future-proof. +*/ +BLOSC_NO_EXPORT void +unshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest); + + +BLOSC_NO_EXPORT int +bitunshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_H */ diff --git a/c-blosc/blosc/win32/pthread.c b/c-blosc/blosc/win32/pthread.c new file mode 100644 index 0000000..28c81e0 --- /dev/null +++ b/c-blosc/blosc/win32/pthread.c @@ -0,0 +1,218 @@ +/* + * Code for simulating pthreads API on Windows. This is Git-specific, + * but it is enough for Numexpr needs too. + * + * Copyright (C) 2009 Andrzej K. Haczewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DISCLAIMER: The implementation is Git-specific, it is subset of original + * Pthreads API, without lots of other features that Git doesn't use. + * Git also makes sure that the passed arguments are valid, so there's + * no need for double-checking. + */ + +#include "pthread.h" + +#include +#include +#include +#include +#include + + +void die(const char *err, ...) +{ + printf("%s", err); + exit(-1); +} + +static unsigned __stdcall win32_start_routine(void *arg) +{ + pthread_t *thread = (pthread_t*)arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg) +{ + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) + _beginthreadex(NULL, 0, win32_start_routine, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int win32_pthread_join(pthread_t *thread, void **value_ptr) +{ + DWORD result = WaitForSingleObject(thread->handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) + *value_ptr = thread->arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +int pthread_cond_init(pthread_cond_t *cond, const void *unused) +{ + cond->waiters = 0; + cond->was_broadcast = 0; + InitializeCriticalSection(&cond->waiters_lock); + + cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); + if (!cond->sema) + die("CreateSemaphore() failed"); + + cond->continue_broadcast = CreateEvent(NULL, /* security */ + FALSE, /* auto-reset */ + FALSE, /* not signaled */ + NULL); /* name */ + if (!cond->continue_broadcast) + die("CreateEvent() failed"); + + return 0; +} + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + CloseHandle(cond->sema); + CloseHandle(cond->continue_broadcast); + DeleteCriticalSection(&cond->waiters_lock); + return 0; +} + +int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex) +{ + int last_waiter; + + EnterCriticalSection(&cond->waiters_lock); + cond->waiters++; + LeaveCriticalSection(&cond->waiters_lock); + + /* + * Unlock external mutex and wait for signal. + * NOTE: we've held mutex locked long enough to increment + * waiters count above, so there's no problem with + * leaving mutex unlocked before we wait on semaphore. + */ + LeaveCriticalSection(mutex); + + /* let's wait - ignore return value */ + WaitForSingleObject(cond->sema, INFINITE); + + /* + * Decrease waiters count. If we are the last waiter, then we must + * notify the broadcasting thread that it can continue. + * But if we continued due to cond_signal, we do not have to do that + * because the signaling thread knows that only one waiter continued. + */ + EnterCriticalSection(&cond->waiters_lock); + cond->waiters--; + last_waiter = cond->was_broadcast && cond->waiters == 0; + LeaveCriticalSection(&cond->waiters_lock); + + if (last_waiter) { + /* + * cond_broadcast was issued while mutex was held. This means + * that all other waiters have continued, but are contending + * for the mutex at the end of this function because the + * broadcasting thread did not leave cond_broadcast, yet. + * (This is so that it can be sure that each waiter has + * consumed exactly one slice of the semaphor.) + * The last waiter must tell the broadcasting thread that it + * can go on. + */ + SetEvent(cond->continue_broadcast); + /* + * Now we go on to contend with all other waiters for + * the mutex. Auf in den Kampf! + */ + } + /* lock external mutex again */ + EnterCriticalSection(mutex); + + return 0; +} + +/* + * IMPORTANT: This implementation requires that pthread_cond_signal + * is called while the mutex is held that is used in the corresponding + * pthread_cond_wait calls! + */ +int pthread_cond_signal(pthread_cond_t *cond) +{ + int have_waiters; + + EnterCriticalSection(&cond->waiters_lock); + have_waiters = cond->waiters > 0; + LeaveCriticalSection(&cond->waiters_lock); + + /* + * Signal only when there are waiters + */ + if (have_waiters) + return ReleaseSemaphore(cond->sema, 1, NULL) ? + 0 : GetLastError(); + else + return 0; +} + +/* + * DOUBLY IMPORTANT: This implementation requires that pthread_cond_broadcast + * is called while the mutex is held that is used in the corresponding + * pthread_cond_wait calls! + */ +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + EnterCriticalSection(&cond->waiters_lock); + + if ((cond->was_broadcast = cond->waiters > 0)) { + /* wake up all waiters */ + ReleaseSemaphore(cond->sema, cond->waiters, NULL); + LeaveCriticalSection(&cond->waiters_lock); + /* + * At this point all waiters continue. Each one takes its + * slice of the semaphor. Now it's our turn to wait: Since + * the external mutex is held, no thread can leave cond_wait, + * yet. For this reason, we can be sure that no thread gets + * a chance to eat *more* than one slice. OTOH, it means + * that the last waiter must send us a wake-up. + */ + WaitForSingleObject(cond->continue_broadcast, INFINITE); + /* + * Since the external mutex is held, no thread can enter + * cond_wait, and, hence, it is safe to reset this flag + * without cond->waiters_lock held. + */ + cond->was_broadcast = 0; + } else { + LeaveCriticalSection(&cond->waiters_lock); + } + return 0; +} diff --git a/c-blosc/blosc/win32/pthread.h b/c-blosc/blosc/win32/pthread.h new file mode 100644 index 0000000..a95f90e --- /dev/null +++ b/c-blosc/blosc/win32/pthread.h @@ -0,0 +1,92 @@ +/* + * Code for simulating pthreads API on Windows. This is Git-specific, + * but it is enough for Numexpr needs too. + * + * Copyright (C) 2009 Andrzej K. Haczewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DISCLAIMER: The implementation is Git-specific, it is subset of original + * Pthreads API, without lots of other features that Git doesn't use. + * Git also makes sure that the passed arguments are valid, so there's + * no need for double-checking. + */ + +#ifndef PTHREAD_H +#define PTHREAD_H + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#include + +/* + * Defines that adapt Windows API threads to pthreads API + */ +#define pthread_mutex_t CRITICAL_SECTION + +#define pthread_mutex_init(a,b) InitializeCriticalSection((a)) +#define pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define pthread_mutex_lock EnterCriticalSection +#define pthread_mutex_unlock LeaveCriticalSection + +/* + * Implement simple condition variable for Windows threads, based on ACE + * implementation. + * + * See original implementation: http://bit.ly/1vkDjo + * ACE homepage: http://www.cse.wustl.edu/~schmidt/ACE.html + * See also: http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +typedef struct { + LONG waiters; + int was_broadcast; + CRITICAL_SECTION waiters_lock; + HANDLE sema; + HANDLE continue_broadcast; +} pthread_cond_t; + +extern int pthread_cond_init(pthread_cond_t *cond, const void *unused); +extern int pthread_cond_destroy(pthread_cond_t *cond); +extern int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex); +extern int pthread_cond_signal(pthread_cond_t *cond); +extern int pthread_cond_broadcast(pthread_cond_t *cond); + +/* + * Simple thread creation implementation using pthread API + */ +typedef struct { + HANDLE handle; + void *(*start_routine)(void*); + void *arg; +} pthread_t; + +extern int pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg); + +/* + * To avoid the need of copying a struct, we use small macro wrapper to pass + * pointer to win32_pthread_join instead. + */ +#define pthread_join(a, b) win32_pthread_join(&(a), (b)) + +extern int win32_pthread_join(pthread_t *thread, void **value_ptr); + +#endif /* PTHREAD_H */ diff --git a/c-blosc/blosc/win32/stdint-windows.h b/c-blosc/blosc/win32/stdint-windows.h new file mode 100644 index 0000000..4fe0ef9 --- /dev/null +++ b/c-blosc/blosc/win32/stdint-windows.h @@ -0,0 +1,259 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#if _MSC_VER >= 1600 // [ +#include +#else // ] _MSC_VER >= 1600 [ + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with . +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ] diff --git a/c-blosc/build.py b/c-blosc/build.py new file mode 100644 index 0000000..3e73da0 --- /dev/null +++ b/c-blosc/build.py @@ -0,0 +1,10 @@ +from conan.packager import ConanMultiPackager +import os + +if __name__ == "__main__": + version = os.getenv("TRAVIS_TAG") or os.getenv("APPVEYOR_REPO_TAG_NAME") or "dev" + reference = "c-blosc/%s" % version + upload = os.getenv("CONAN_UPLOAD") if (version != "dev") else False + builder = ConanMultiPackager(reference=reference, upload=upload) + builder.add_common_builds(shared_option_name="c-blosc:shared") + builder.run() diff --git a/c-blosc/cmake/FindLZ4.cmake b/c-blosc/cmake/FindLZ4.cmake new file mode 100644 index 0000000..e581a80 --- /dev/null +++ b/c-blosc/cmake/FindLZ4.cmake @@ -0,0 +1,10 @@ +find_path(LZ4_INCLUDE_DIR lz4.h) + +find_library(LZ4_LIBRARY NAMES lz4) + +if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY) + set(LZ4_FOUND TRUE) + message(STATUS "Found LZ4 library: ${LZ4_LIBRARY}") +else () + message(STATUS "No LZ4 library found. Using internal sources.") +endif () diff --git a/c-blosc/cmake/FindSnappy.cmake b/c-blosc/cmake/FindSnappy.cmake new file mode 100644 index 0000000..688d4d5 --- /dev/null +++ b/c-blosc/cmake/FindSnappy.cmake @@ -0,0 +1,10 @@ +find_path(SNAPPY_INCLUDE_DIR snappy-c.h) + +find_library(SNAPPY_LIBRARY NAMES snappy) + +if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) + set(SNAPPY_FOUND TRUE) + message(STATUS "Found SNAPPY library: ${SNAPPY_LIBRARY}") +else () + message(STATUS "No snappy found. Using internal sources.") +endif () diff --git a/c-blosc/cmake/FindZstd.cmake b/c-blosc/cmake/FindZstd.cmake new file mode 100644 index 0000000..7db4bb9 --- /dev/null +++ b/c-blosc/cmake/FindZstd.cmake @@ -0,0 +1,10 @@ +find_path(ZSTD_INCLUDE_DIR zstd.h) + +find_library(ZSTD_LIBRARY NAMES zstd) + +if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) + set(ZSTD_FOUND TRUE) + message(STATUS "Found Zstd library: ${ZSTD_LIBRARY}") +else () + message(STATUS "No Zstd library found. Using internal sources.") +endif () diff --git a/c-blosc/cmake_uninstall.cmake.in b/c-blosc/cmake_uninstall.cmake.in new file mode 100644 index 0000000..c6d8094 --- /dev/null +++ b/c-blosc/cmake_uninstall.cmake.in @@ -0,0 +1,22 @@ +if (NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") + message(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") +endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") + +file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) +string(REGEX REPLACE "\n" ";" files "${files}") +list(REVERSE files) +foreach (file ${files}) + message(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"") + if (EXISTS "$ENV{DESTDIR}${file}") + execute_process( + COMMAND @CMAKE_COMMAND@ -E remove "$ENV{DESTDIR}${file}" + OUTPUT_VARIABLE rm_out + RESULT_VARIABLE rm_retval + ) + if(NOT ${rm_retval} EQUAL 0) + message(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") + endif (NOT ${rm_retval} EQUAL 0) + else (EXISTS "$ENV{DESTDIR}${file}") + message(STATUS "File \"$ENV{DESTDIR}${file}\" does not exist.") + endif (EXISTS "$ENV{DESTDIR}${file}") +endforeach(file) diff --git a/c-blosc/compat/CMakeLists.txt b/c-blosc/compat/CMakeLists.txt new file mode 100644 index 0000000..6db3e88 --- /dev/null +++ b/c-blosc/compat/CMakeLists.txt @@ -0,0 +1,35 @@ +# build the filegen utility +link_directories(${PROJECT_BINARY_DIR}/blosc) +add_executable(filegen filegen.c) +# have to copy blosc dlls on Windows +if (MSVC) + add_custom_command( + TARGET filegen + POST_BUILD + COMMAND ${CMAKE_COMMAND} + ARGS -E copy_if_different + "${PROJECT_BINARY_DIR}/blosc/\$\(Configuration\)/blosc.dll" + "${CMAKE_CURRENT_BINARY_DIR}/\$\(Configuration\)/blosc.dll") +elseif (MINGW) + add_custom_command( + TARGET filegen + POST_BUILD + COMMAND ${CMAKE_COMMAND} + ARGS -E copy_if_different + "${PROJECT_BINARY_DIR}/blosc/libblosc.dll" + "${CMAKE_CURRENT_BINARY_DIR}/libblosc.dll") +endif(MSVC) +target_link_libraries(filegen blosc_shared) +add_dependencies(filegen blosc_shared) + +# tests +if (BUILD_TESTS) + option(TEST_INCLUDE_COMPAT "Include compat checks in the tests" ON) + if (TEST_INCLUDE_COMPAT) + file(GLOB DATAFILES *.cdata) + foreach(datafile ${DATAFILES}) + get_filename_component(fname ${datafile} NAME) + add_test(test_compat_${fname} filegen decompress ${datafile}) + endforeach(datafile) + endif() +endif (BUILD_TESTS) diff --git a/c-blosc/compat/README.rst b/c-blosc/compat/README.rst new file mode 100644 index 0000000..9845513 --- /dev/null +++ b/c-blosc/compat/README.rst @@ -0,0 +1,4 @@ +Compressed datafiles for testing backward/forward compatibility +=============================================================== + +The files here have been created with different versions of the C-Blosc library and are meant to test backward/forward compatibility among different versions of the library. diff --git a/c-blosc/compat/blosc-1.11.1-blosclz.cdata b/c-blosc/compat/blosc-1.11.1-blosclz.cdata new file mode 100644 index 0000000000000000000000000000000000000000..6c58ee65a59fa8641688ed8de1aeee9551da956f GIT binary patch literal 26736 zcmeI)Ra6yQ9Dwo7;m{@67)MNO3@{PJPHZu-0~-|;u)DjvySux)yRbX3^K$3A-sM`n zvtV5F-w(fao|t|1+Gl1y4vfjl%B&pxl~VR9EL15U?NyYEQina2a`jd!*jK5Afl9p! zQpztxDGzI<0+f}>+H7NMXYb(X(z^cG77uZ)>8XUUq) zJA00txpL>p>ys~kfr7q%g$frbTCBK#i2zHRUzF>}`JIdkV(<}X;dXz`Mz%a*TLxoY*Awd>Yz z*tlu)maW^i@7TF(_ny7`Ec*`}JaqWT(PPI?oIG{<%-M72FI>EI`O4L6*Kgdsb^FfU zdzR??4<0^x{N(Ag=PzEqdj00@yO{SMK7RWAfxO@Y`5-?OfP&x)eozPsLlGzn#h^I& zLkS3gl28f)p){0%vQQ4nLj|Y^m7p?IfvQjqf}lFofSOPXYC|2U3-urv>O%u)2#p{F z8bcFk3eBK7w1Ae-3R*)O2!*!L4%$Np=m?#lGlW4G=nCB+9J)gf=m`-J2~p4sdP5)R z3;m!!41j?!2nNFt7z)E+IE;XiFbYP)7#IuVU_4BKi7*K!!xWeb(_lKxfSE80X2Tqq z3-e$;EP#cu2o}QI+h9BFfSs@lcEcXn z3;SR{9Dsvx2oA#$I10z$IGli!a0*Vt88{2);5=M_i*N}p!xgv+*WfzbfSYg&Zo?h8 z3-=%z?!yCk2#??~Jb|b144%UacnPoIHN1hh@D5_&J$!(V@CiP{7x)U_;5+<)pYRL* zfmjVgUkZ5hA7Z1GH80#vzb^;8HuCuIa%kiQ;JwEvvlYWxHigl*!teM0UTF;_B_$;# zABR6HASERw#eHNeXbo*36xu>NXb&BrBXok!5C&bKD|CZ!=ng%gCqzIbL_sg;4Sk?5 z^aCj=DJk}Wl+>Rybb`K!TT&*I;eXSrm6WyN7eOm2Ylf6mf_@nFdoU>}`A$l|yEAcS zCM6{$#jUH9lzb<}K9KLE66ZXulcUBhDYMz|5uufojo}wTD=8a>l$1_BrincbDJdzb z#NLB?JHC{Zl$4Z|{B@reQS#S)N^fW91fNgblCrfmd_-s^WoP(B&`QdVAtjaIA4q*3 zP)bTlN}oG3@n$6@B_$;#B_$;#CI2U-U4`P7l)b&-BSI@F2g5IdR#FZODXGM}QPb6t zrKF^!bhROMFc~Q+DJdx_DJdx_DJku86t|=t9St85T1hz>ei5{ia$-nH>EMn{4^xnm zl9JNHKGf43q@<*zq@<*zq@<*zwChsbl5%!7d_-s^qfwUt!rUn_fmLI3~& literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.11.1-lz4.cdata b/c-blosc/compat/blosc-1.11.1-lz4.cdata new file mode 100644 index 0000000000000000000000000000000000000000..b961fa5b4d41bcf6e646d2cca2b33f109d94372a GIT binary patch literal 33610 zcmeI**K?KE9WLN)$Bykdj^iXwa~vm5FOKa15(oqm9Z@8T=)EIAbP&CW-h1!8_uhN& zz4tB<2t*S?I(l;EOwJ#WJMVSz%r~Q%ees)b?OE?Djn><5efv(}gYZBg@O~gFF%bAB z>nZw^K%n`TfxxhAfxz;DfxwB_Kp?e#An<8IAn;k%&oWa2Z@vA!ci#Q}d+&engAafB z(T_g<@h3m|>CZm>`7eI?t6zWio6mpy#qWOqej2@pkbr9#!Z^WH*4OaWvkY0+O|t*-=Slt&Rx28>)s==XRqFU`u6KTVBny^Lxv6; zK4Rpk(PPGr8$V&{x}qDpqX!bv6TQ$Ieb5*E&>sUZ5Q8unLogJ> zFdQQ=5~DC0V=xxuFdh>y5tA?(Q!o|NFdZ{66SFWIb1)b4FdqxB5R0%FORyBnupBF} z605KpYp@pUupS$*5u30XTd)<|upK+F6T7e*d$1S#upb9-5QlIWM{pF!a2zLa5~pw) zXK)tha2^+M5tncoS8x^Aa2+>r6Sr_1cW@W?a34u{fQLxNBRob5p5Q4`@eI%L0x$6j zX?Trvyg>#sv-mh8GwJKBqd;cXmmuE8_wWwh#rN?Z-p2>{0Y1bJ@ezK6kMU!Cf}h~0 z_!&OM&+!ZV62HQ)@fm)D&+%J)f#2cx_!57>AMq#r8GpfF@i+V(|G+=-75;^<@eRJk zzmW~ukpm$JMNZ^G7;+;I!jTvG5P?YKM*$Q>ArwXt6h$#ap*TvQBuXI~F({2PD2sB4 zMR`;}MN~p%R6$i#Lv_?ZP1Hhd)InX;Lwz(rLo`Ah8lwrCA|B1q94*iit2^hO`_ML+b%01U(+48{-)#V`!V2#mxijK&y@#W;+| z1Wd#vOvV&U#WYOE49vtV%*Gtd#XQW%0xZNLEXEQn#WF0%3arE`ti~Fw#X79V25iJ8 zY{nLB#Wrlm4(!A(?8YAK#XjuE0UX339L5nG#W5Vm37o_!oW>cP#W|eE1zf}>T*eh# z#Wh^V4cx>n+{PW;#Xa0d5+2|olJN+Sk%A|9ic~zqbG*Pyyh0jYBOPy$k;T_p86c4P z9bf-B0Ds44ko7(U--Pe_JCK=qHzun%2?Rb5)+51sB)A(ggY`&OPRcGUw>E3`%%v_(54pglUEBRZiox}Yn% zp*wmY5k1ihz0n7K(GUGG00S`ygE0g{F$}{o0wXaBqcH|!F%IJ~0TVF^lQ9KTF%8o( z12ZuTvoQyAF%R>x01L4Qi?IYtu?)+x0xPi!tFZ=au@3980UNOio3RC3u?^d?13R$` zyRip*u@C!k00(ghhj9c)aSX?C0w-|_r*Q^naSrEk0T*!zmvIGGaShjT12=ICw{Zt| zaS!*Aga>$tWIV!Sq~HmjA{Ec@953(^uaJh2}#Go|Fpe)KE7UfX^6;TP5Q3X{| z4b@QtHBk$-Q3rKV5B1Ri4bcd3XpAOkig+|bbF@H9v_fmNL0hy#0@|YkI-(OgqYJvC z8@i(h644XA&>MZw7yZy5127PSFc?EH6vHqaBQO%9FdAbp7UM7;6EG2zFd0)Y71J;s zGcXggFdK6)7xOS53$PH2uoz3Q6w9z2E3gu)uo`Qy7VEGc8?X_Zuo+vh72B{KJFpYG zup4`@7yGau2XGLFa2Q8$6vuEJCvXy{a2jWD7Uyst7jO}ma2Z!{71wYbH*gcTa2t1U z7x!=PMhc$bDN^wa&+!5;@d{~pjdZ+01~LN~4?li8konzS|M|V(cY6e0 zhCr|?`>wwP!Ga|C_aoo=J@EexAXt!OC8g}*QVxV56giO#VaSa<2uEJzLj)p`9|cel zg-{qpP!z=wh2kiIk|>2}#Go|Fpe)KE7UfX^6;TP5Q3X{|4b@QtHBk$-Q3rKV5B1Ri z4bcd3XpAOkig+|bbF@H9v_fmNL0hy#0@|YkI-(OgqYJvC8@i(h644XA&>MZw7yZy5 z127PSFc?EH6vHqaBQO%9FdAbp7UM7;6EG2zFd0)Y71J;sGcXggFdK6)7xOS53$PH2 zuoz3Q6w9z2E3gu)uo`Qy7VEGc8?X_Zuo+vh72B{KJFpYGup4`@7yGau2XGLFa2Q8$ z6vuEJCvXy{a2jWD7Uyst7jO}ma2Z!{71wYbH*gcTa2t1U7x!=PMhc$b zDN^wa&+!5;@d{~pjdZ+01~Rh<79?5c;%pAb*^vVw2t`ihLKt!*55kcb`4E9foJpd?Bm8Zju1GAN63h(&o+Kt)tSWmG{`R6}*tKuy#_ZPYSt$60Oi0ZO|6&kbw5+fR5;d&gg=!=!Wj-fkgB~FZ4zq^hH1P z#{dk(APmM348<@E#|VtXD2&D!jKw&N#{^8oBuvH>OvN-z#|+HGEX>9n%*8y+#{w+G zA}q!dEX6V`#|o^(Dy+sDti?L4#|CV~CTzwQY{fQg#}4eoF6_o0?8QFp#{nF~AsogL z9K|sl#|fOoDV)X`oW(hu#|2!(C0xc8T*Wn9#|_-XE!@T(+{HcIM-m?3A(HV3kCB2W zc#2d!!*jgAOT0oFULzfEkb%rVdaxkLn!~%nZXj5YWCmBTAPE*E!Cmm*2?Pt0tfZ7( zT*`qEgd!($Aq=^Z2jR$ze273K@}mF>q7VwB2#TT@qEH+qP!gpOjTn?h8I(mi#G*VZ zpdu=vGOC~|s-Ze+peAaeHtL`*>Y+XwpdlI|4voNI-jZ zKu2^!XLLbVbVGOaKq7jg7kZ-)`l28DV*mzX5C&rihGH0oV+2NG6h>nV#$p`CV*(~( z5+-8`reYeVV+Lko7G`4(=3*Y^V*wUo5f)sD|pOftsj=+NguNsE7J!fQD#Z#Sju9A%Q5cOe7>jWjj|rHF zNtlc&n2Kqbjv1JVS(uGEn2ULsj|EtWMOcg_Sc+v>julvmRalKRSc`R7j}6#}P1uYr z*otk~jvd&EUD%C1*o%GGj{`V}LpY2hIErI9juSYEQ#g$?IE!;Qj|;enOSp_HxQc7I zjvKg%TeyuoxQlzZk0d<6LnPx79wP-$@D!oJpd?Bm8Zju1GAN63h(&o+Kt)tSWmG{`R6}*tKuy#_ZPYSt$60Oi0ZO|6&kbw5+fR5;d&gg=!=!Wj-fkgB~FZ4zq^hH1P#{dk( zAPmM348<@E#|VtXD2&D!jKw&N#{^8oBuvH>OvN-z#|+HGEX>9n%*8y+#{w+GA}q!d zEX6V`#|o^(Dy+sDti?L4#|CV~CTzwQY{fQg#}4eoF6_o0?8QFp#{nF~AsogL9K|sl z#|fOoDV)X`oW(hu#|2!(C0xc8T*Wn9#|_-XE!@T(+{HcIM-m?3A(HV3kCB2Wc#2d! z!*jgAOT0oFULzfEkb%rBf(1!dILIa`Wk(K#AQU-~3t`BOJP1c#fs!bNXvCm2%AhRDAr|FP0TodRl~Dy%Q4Q5m12s_#wNVFkQ4jUe01eRy zacGPtXo`3=Lvyr1OSD33v_V_6Lju~P13ID;I-?7^q8qxS2NKZ}z0ezd&=>vC9|JHD zgD@CFFciZu93wCiqc9p{Fc#x59uqJTlQ0=mFcs4<9WyW!voITTFcfs!bNXvCm2%AhRDAr|FP0TodR zl~Dy%Q4Q5m12s_#wNVFkQ4jUe01eRyacGPtXo`3=Lvyr1OSD33v_V_6Lju~P13ID; zI-?7^q8qxS2NKZ}z0ezd&=>vC9|JHDgD@CFFciZu93wCiqc9p{Fc#x59uqJTlQ0=m zFcs4<9WyW!voITTFc7i{E`%XB@*o^}kq;4wM1B-NK@>t^6hToGLllam1WKY5q7j4AD1)*nhgg(H1yn>O zR7Mq4MKx4M4b(&})J7fDMLpC<12jY<#Gx^opef?f49(F3Ezt_C(FSeN4hd+F4(NzZ z=!`Dtif-tR9!Nw_^g?g+L0|Mke+S{;Wg6n1{ufSdat@ zlFZ-=79_!fB)ALyJAq(9l9iOQi%U5Wf>7i{E`%XB@*o^}kq;4wM1B-NK@>t^6hToG zLllam1WKY5q7j4AD1)*nhgg(H1yn>OR7Mq4MKx4M4b(&})J7fDMLpC<12jY<#Gx^o zpef?f49(F3Ezt_C(FSeN4hd+F4(NzZ=!`Dtif-tR9!Nw_^g?g+L0|Mke+S{;Wg6n1{uiAB3O`Qg@bI8Qg-A(2ttt)xe$ij$b)d?MLt9z68TX81yKlvQ3OR% z3{fbK5-5pMh(-)bqYTQT9AZ%(6;KhCP#INF71dB3HBb|^P#bkn7xhpd4bTvc5QoNS zf~JT^Gc-pFv_vbkMjNz6J0zezI-nyup)#+eFu?d^81zWKV+pz;Xu?xGg2Yay(`*8pVaR`TT1V?cU$8iED zaSEq#24`^&=WziSaS4}k1y^wm*Kq?kaSOL`2X}D~_mPALc!*>?!egZ137#Sq&+r^C z@Di_(hSx~P8)P6ekQyvVvgYtnupkK*B$>hW{|b_CKMY>-8@%NAf3Y8$`QOjMdveVn z2SN~voXCYRf~u&7>ZpO5sD;|7gSx1P`e=ZLXoNU4MiVqeJer|7TA(Fbp*7l|E!rUg?a=`p z(FvW=1zph%-O&Sy=!stFjXvm$e&~+@7>Gd_j3F3`VHl1P7>Q9BjWHODaTt#Yn21T3 zj47CkX_$@~n2A}KjX9W$d6pfzIEhm@jWallb2yI+xQI)*j4QZ`Yq*XZ zxQSc1jXSuDd$^AzJitRF;}ISs1yAr4sd$Fxc!8IAg*3cII^G}ynOS`7oE^YhcpKlt zJ9ro0$9s4mAK(Z05I@96_!0i|c|Z2_34Vf~;%E32KgTcdOZ*DI#%K5qKF4qI1%8L$ z<4gPjf5e~gXZ!_!#ozFE`~&~QSNIpc#y9vD|3)@sM-GG_6giO#VaSa<2uEJzLj)o- iGr#&cB6~2fWX*e~GSw#~88qt?vyv~AnAZQHhO+qP}n=Kj{x=UkjW;JZ1|t1h0%*cTO9 zzsSm}{bsJU!iM=hd`OfGAt52LL#}rS3HdzG%J)S`NUJO%A=9dcgnS=phdvGo6E*RJcgdV#Q08ELFNp*>dG8 zRIF6FO4VxBYt*b&yH4GD^&2#7)VN90X3bl)Y}L9=+ji|cbnMi*OV@7Qd-UwpyHDSK z{Ra#jG6d(PZ>^A{{!w0OzVWy@ErT(x@5 z+I8zUY}~YY%hqk%ckJA?d(Yl|`wtvEboj{8W5-XNJazia*>mSFT)cGo%GGPvZ`{0f z`_A2a_a8ib^!UlsXU|`}eD(Ux+js9jgocC(4IaS+f(d-W2|S7wCgk2{VGtJK5FQZ_ z5s?rXQ4kf;5FIfP6CY0%TPqIYA|B!+0TLn+5+ezcA{mk+1ybTOe2y>hCBDMf_y*tN zJA98H@FRZ0&-ewuA{Bna@Aw0M;xD8|8l*)!q{rX*2mc}i{zFD&LS|$^R%AnVkLS6PCTNOgXpRXpau)h)(E?F6fGG=#C!fiC*Z9KIn^n=#K#yh(Q>PAsC8b7>*GbiBTAh zF&K++7>@~Th(~ygCwPiyc#ao%iC1`yH+YM8c#jVVMF_$m zEW#l?A|N6nAu^&MDxx7eVjw1BAvWS5F5)3R5+ETGAu*C5DUu;MQXnM)9SqKYGAM@P zD1nkFh0-X4vM7i0sDO&7gvzLbs;GwQsDYZOh1#ftx~PZxXn=-jgvMxsrf7!dXn~e! zh1O_;wrGd;=zxysgwE)KuIPsD=z*T-h2H3czUYVk7=VEoguxhsp%{kY7=e)(h0z#; zu^5N(n1G3xgvpqKshEc8n1Pv?h1r;cxtNFfSb&9CgvD5brC5gLSb>#Th1FPtwOEJs z*no}Lgw5E3t=NX`*nyqch27YLz1WBSIDmsVgu^(3qd11+IDwNmh0{2Lvp9$IxPXhe zgv+>stGI^ixPhCvh1n|BM#ys9^xYb5+V^2BMFis8ImIfQX>EXoyB=j3#J`W@wHUXo*&6 zjW%eDc4&_d=!j0}j4tSkZs?94=!stFjXvm$e&~+@7>Gd_j3F3`VHl1P7>Q9BjWHOD zaTt#Yn21T3j47CkX_$@~n2A}KjX9W$d6pfzIEhm@jWallb2yI+xQI)* zj4QZ`Yq*XZxQSc1jXSuDd$^AWc!)=Mj3;=CXLybmc!^hdjW>9UcX*Ev0a67P0YXAQ z@#ACt^Apb?Fdo7NN5aSYcW@*G=Y&sv3=JvyKxI-xVVpewqeJ9?ledZ9P^pfCENKL%hR24OIUU?_%RI7VP3 zMqxC@U@XRAJSJcwCSfwBU@E3zI%Z%dW??qwU@qoiJ{Djh7GW`#U@4YiIaXjLR$(>P zU@g{RJvLw?HeoZiU@Nv^J9c0vc40U6U@!JzKMvp^4&gA4;3$saI8NXsPT@4p;4IGJ zJTBlOF5xn+;3}@+I&R=5Zs9iW;4bdrJ|5s99^o;b;3=NrIbPr;Ug0&~;4R+aJw6~b zKrB_GK(#1XE0;lPq(NGwLwfv;fAB9d;6G$UCS*nyWJNY)M-JpfF62fY zArwXt6h$!w>E3`%%v_(6#M+bC7Cv-*^bVWCGM-TKwFZ4zq^hH1P#{dk(APmM348<@E z#|VtXD2&D!jKw&N#{^8oBuvH>OvN-z#|+HGEX>9n%*8y+#{w+GA}q!dEX6V`#|o^( zDy+sDti?L4#|CV~CTzwQY{fQg#}4eoF6_o0?8QFp#{nF~AsogL9K|sl#|fOoDV)X` zoW(hu#|2!(C0xc8T*Wn9#|_-XE!@T(+{HcI#{)dXBRs|vJjF9S#|yl~E4;=Vyu~}b z#|MNW1Yr;s;Se4X5D}3O8Bq`w(GVRm5EHQw8*va9@em&gkPwNG7)g*6$&ef=kP?9o z2G6 z(G1Pe0xi)Btx01L4Qi?IYtu?)+x0xPi! ztFZ=au@3980UNOio3RC3u?^d?13R$`yRip*u@C!k00(ghhj9c)aSX?C0w-|_r*Q^n zaSrEk0T*!zmvIGGaShjT12=ICw{Zt|aS!+L01xp9kMRUg@eI%L0x$6juki+N@ec3t z0ig&%7=%SQghvEKL?lE;6huWdL`Mw7L@dNc9K=OD#76=oL?R?c5+p@3Bu5IQM4*Gg z`A-JLP#h&t5~WZYWl$F7P#zUf5tUFGRZtbxP#rZ;6SYtqbx;@eP#+D@5RK3nP0$q0 z&>St$60Oi0ZO|6&&>kJo5uMN(UCcO{6TQ$Ieb5*E&>sUZ5Q8unLogJ>FdQQ= z5~DC0V=xxuFdh>y5tA?(Q!o|NFdZ{66SFWIb1)b4FdqxB5R0%FORyBnupBF}605Kp zYp@pUupS$*5u30XTd)<|upK+F6T7e*d$1S#upb9-5QlIWM{pF!a2zLa5~pw)XK)th za2^+M5tncoS8x^Aa2+>r6Sr_1cW@W?a32rw5RdQ}Pw*7a@EkAj60h(YZ}1lH@E#um zqzVlmTp%)4!~orbZg1MLML%-}ndrA_M+IMr1-}WIt^ z6hToGLvfTqNt8lqltEdPLwQs{MN~p%R6$i#Lv_?ZP1Hhd)InX;Lwz(rLo`BTG(l4| zLvyr1OSD33v_V_6Lwj^UM|47GbU{~iLwEE*PxL}>^g&!*QIzNu0uIoWWU~ z!+Bi5MO?yVT)|ab!*$%iP29q5+`(Pk!+ku!Lp;J`Ji${u!*jgAOT5Bsyun+%!+U%{ zXnh=G{+c)r+LaS#{r5FZJU5Q&f&NstuDkQ^zH z5})C7e1R|V6~4wd_!i&cd;EYO@e_W=FZdOy@Ed-|ANUi0AvMwDG}7a8y$ zG9nW)BMY)38?qw@av~RUBM$k7>c6=N}?1>qYTQT9Ll2tDxwl9 zqYA2`8mglPYN8fuqYmn#9_ph38ln*zqY0X#8JeR7TA~$NqYc`k9onM)aV-40~9oAz5HewStV+*!o8@6Kyc48NH zV-NOXANJz_4&o3F;|Px87>?rvPT~|!;|$K?9M0ncF5(g{;|i|g8m{98ZsHbh;|}iP z9`54-9^w%m;|ZSP8J^<>Ug8yA;|<>89p2*uLJ@*62#atCj|hl}NQjImh>B>4ju?oE zScr`{h>LiLj|51FL`aMzNQz`gjuc3VKnH{KpA3qjI7*--N})8$pe)LvJSw0fDxor} zpem}NI%=RMYN0mjpf2j6J{q7Q8lf?opedT6Ia;74TA?-Cpe@>=JvyKxI-xVVpewqe zJ9?ledZ9P^pfCENKL%hR24OIUU?_%RI7VP3MqxC@U@XRAJSJcwCSfwBU@E3zI%Z%d zW??qwU@qoiJ{Djh7GW`#U@4YiIaXjLR$(>PU@g{RJvLw?HeoZiU@Nv^J9c0vc40U6 zU@!JzKMvp^4&gA4;3$saI8NXsPT@4p;4IGJJTBlOF5xn+;3}@+I&R=5Zs9iW;4bdr zJ|5s99^o;b;3=NrIbPr;Ug0&~;4R+aJw6~5Aqa!82#4^9fQX2M$cTcdh=%BhftZMe z*ocF;h==${fP_ed#7KgqNQUG{fs_bzFgX9opcsmy1WKY5N}~+Qq8!Sj0xF^sDx(Uj zq8h5B25O=fYNHP7q8{p_0UDwa8lwrCq8XZ_1zMsNTB8lxq8-|!13ID;I-?7^q8qxS z2YR9xdZQ2eq96KW00v?Z24e_@Vi<;F1V&;MMq>=dVjRX}0w!V-CSwYwVj8An24-Rw zW@8TKVjkvW0TyBr7GnvPVi}fW1y*7eR$~p;Vjb3F12$q4He(C6VjH$&2X{Nros z$Jeqzsm0%7ASPlVHsT;I;vqf~AR!VVF_IuDk|8-#ASFJ-=lB9&;wyZOZ}2U?!}s_B zKjJ6+j9>68QsFoJjz91x{z7V`L0Y6kdi;%l@GmmpKV(EEWJVTbMK)wd4&+2GOR7Mq4MKx4M4b(&})J7fDMLpC< z12jY1WMLV=d2XsUybVe6+MK^Ru5A;MY^hO`_ML+b%01U(+ z48{-)#V`!V2#mxijK&y@#W;+|1Wd#vOvV&U#WYOE49vtV%*Gtd#XQW%0xZNLEXEQn z#WF0%3arE`ti~Fw#X79V25iJ8Y{nLB#Wrlm4(!A(?8YAK#XjuE0UX339L5nG#W5Vm z37o_!oW>cP#W|eE1zf}>T*eh##Wh^V4cx>n+{PW;#Xa1|13bhdJjN3|#WOs|3%tZD zyv7^6#XG#m2ZRQQrSKAhFbIoq2#*Meh)9TxD2R$^h>jSDiI3-ttrZ7x5fAZ^011%@ ziID_JkqpU^0x9tsKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+kqW=zcl?1r@fT7f4bmbV z(&KOZgMX0${~;qXAv3ZdE3zRwav&#iAvf|MFY+Nj3ZNhgp)iV|D2kytN}wc4p)|^% zEXtugDxe}Np)#tVDypG6YM>@+p*HHEF6yB^8lWK>p)s1EDVm`xVVK??*FZN+S4&WdT;V_QiD30McPT(X?;WWc6=N}?1>qYTQT z9Ll2tDxwl9qYA2`8mglPYN8fuqYmn#9_ph38ln*zqY0X#8JeR7TA~$NqYc`k9onM< zI-(OgqYJvC8@i(hdZHJ4qYwI`ANpee24WBfV+e*~7=~j6Mq(63V+_V(9L8e;CSnpM zV+y8X8m40gW?~j*V-DtG9_C{K7Ge<=V+odG8J1%OR$>)aV-40~9oAz5HewStV+*!o z8@6Kyc48NHV-NOXANJz_4&o3F;|Px87>?rvPT~|!;|$K?9M0ncF5(g{;|i|g8m{98 zZsHbh;|}iP9`54-9^w%m;|ZSP8J^<>Ug8yA;|<>89p2*uLJ@*62#atCj|hl}NQjIm zh>B>4ju?oEScr`{h>LiLj|51FL`aMzNQz`gjuc3VKnMSSng23{{-PofNc59FK2}6N m=>!57|FI$z3?bJ4#_iA`_)+OkeIy01|9>TcdjIE(+5ZPEr?WEv literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.11.1-snappy.cdata b/c-blosc/compat/blosc-1.11.1-snappy.cdata new file mode 100644 index 0000000000000000000000000000000000000000..68f3075bdc9c00e5fe86937888b2c1f281449447 GIT binary patch literal 199683 zcmeI*Wvo}%zu@5mb$55c-Q6it+}+*XoffA!#ogUqTHM{;-L)+(bYO1E?K3YX`M;P+ zF0ACF+0Q<|q^wWcQ&!hzeJ z92hBL?Sn`Wrz=K|h`cFs#Ql#VBHFfn@%D3(A|ge8=Go_-f8oWKUVi1(*Is|)&9~lu z=iT?-|KP)qKK|s>&p!X+%dfuv=G!PyqeYJqGgj<4apT2LkT6l=BuSGcPmwZJ>NIK7 zrO%KtQ|9loWX+cS`y4rQ<<65gU;Y9G3l%O>v{>;HB}@wXV0C#aPiXRD_5^wzj5={?K^jW z`~8o5fBtp*GNbQM7L*lbL)p>yCz8}&hbQ9sll{fq{nfoKpK zjE110Xc!ufMxc>s6dH}jps{Ei8jmKRiD(jIue=q~yV{f_=X_t2l{FLWP0K!2kM6bVH}&!A_~bLe^W0(ud>gkDCkpjXjr=ymi4 zdK0~c-bU}BchP(3ee?nP5PgI`MxUTh(P!v$^ac77eTBY8-=J?%6ciOjL(x$T6cfcl zu~8fp7sW&IQ38|@B|?c&5|k7rL&;GJloF*vsZkn~7NtY!Q3jL|WkQ+JcPIW_X#1JFP;2n|L<&`>lC4M!u;NHhwK zMq|)eG!BhN6VOC72~9>*&{Q-HO-D1(Of(D4Msv_yG!M;33(!Ke2rWiS&{DJvEl0ng zU(pJ*60Jh3(HgWCtwZb42DA}vLYvVRv=wbb+tCiR6YWB~(H^uH?L+&~0dx=@LWj{2 zbQB#!$I%IN5}iV)(HV3WokQo*1#}TzLYL7MbQN7g*U=4h6Wv0$(H(Rb{f2%=f1rEl zPxKeMj~<}E|E(83{6CKYAAdJogaV;JC=d#S0--=C5DJ6>p}-Saz(@a{$R&nFg#w{K zC=d#S0--=C5DJ6>p}^w;4|f3JDBy98a1jcG0--=C5DJ6>p+G1Q3OpqR9vuZlJSA%l z%M1lVflwe62n9lcP#_ct1ww&`JAg19CQOG3193uuP#_ct1ww&PAQT7%LV>5IK$s2_ zMq@ttuK|BjXFG*;g#w{KC=d#S0--=C5DNT91;TWgh(}Msz5Vzo@R8m@@1pn6`{)Do zA^He?j6Ol1qR-Ih=nM2E`U-uGzCqujC@3n5hN7bwC?<-9Vxu@HE{ccZqXZ}+N`w-l zBq%9LhLWQcC?!gTQlm5|ElP*dqYNk`%7ikb?@$($6=g%&(f23^%87EJ+$ayqi}IoT zr~oR63ZcTN2r7z-q2j0nDv3&=(x?n7i^`$$r~;~pDxu1#3aW~#q3Wmxs)=f$+Nchy zi|V2Jr~zt-8llFh32KU(q2{OsYKdB*)~F3?i`t>~=m+#8>VP_;PN*~Lg1Vw^s5|-z z^*}vQFVq|LL48p_)F1tf2B3jx5E_hzprL3O8jePwk!TbejmDs{XdD`kCZLIE5}J&r zps8pYnvQ0mnP?W8jpm@aXdar67NCV_5n7CvprvRTT8@4}zoHdrC0d16qcvzPT8Gx7 z4QM0Ugf^osXe-)=wxb|1Cc1@gqdVv>`VIY#{y_K8pXe`iA3Z>SqX-lUMMlq{ zXVG)$dGrE$5xs<7Mz5e((QD{+^agqpy@lRJ@1S?ld+2@i0s0Vqgg!=}pij|f=yUW1 z`VxJGzDD1mZ&4H!6-7hQQ4ACl#X_-B926JDL-A1pln^CCiBS@i6eUB+Q3{k2r9!Du z8k82LL+Mcllo4e@nbCJB3(AVJq3q~;lmq2NxlnGD2jxZiP<~VZ6-0$lVN?VaMa58Y zR05SmrBG>929-tSP(K_Z5p6=7(H687ZA079 z4zv^PLc7r(v={9|`_Tb(5FJ8?(GheM9Ye>_33L*jLZ{IgbQYaM=g|dp5nV!;(G_$R zT|?K=4RjOTLbuT!bQk@Gen)?xd+1N}7rKugpuhjEe_ja}`SHzg5ekF?p+G1Q3WNfo zKqwFjgaS`?fk%(OJ@aHQF{~~W2n9lcP#_ct1ww&PAQT7%9v67H1Ni4CAmZ`Oa1jcG z0--=C5DJ6>p+G1Q3WNerc7aDn0bvIa3WNfoKqwFjgaV;JC=d#S0#8eUhdY2U9VSeN zd1~GS{vW+LFMK!@2n9lcP#_ct1ww&P;E5~{ro)8kFkw1OC=d#S0--=C5DJ6>p+G3` zR1^r)VZwBn#}7F^dLA$eii)D4=qLt?iDIGHC=QB?;-UB`0ZNDxp~NT&N{W)9WsReuBaR8j($QtP*2ng^+tVA zU(^rvM?a$hXdoJd2BRTpC>n-_qY-E%8ihuqF=#9rhsL7`Xd;?~CZj26Dw>9-qZw!> znuTVgIcP4LhvuUNXdzmJ7NaF-DO!e>qhHXkXa!n{R-x5s4O)xVq4j73+K4ux&1ehS zingKcXb0MfcA?#9588|Nq5bFpI*1OT!{`V)ijJY<=ma{6PNCE23_6R>q4Vehx`-~J z%jgQaimsvS=mxrpZlT-g4!VneL%*Xx&^`1g`U~Ah576Hz0!2cR(KF~-^c;E~y?|as zFQJ#wE9h198hRbQf!;)Kp|{aH=w0+4dLMm&K13g(kI^USQ}h}79DRYlL|>t=(KqN@ z6a_^^(NJ_01I0wKP;3+j#YOQ@e3SqsM2S#hlmsP3$xw2X0;NQ$P->J0rA6sbdXxcW zM43=#^c~8AvZ8D#JNh2wKsiw^lpEzic~L%;9~D3aQ6W?q6+uN&F;pCtKqXNrR2r2* zWl=d)9#udUQ6*FvRY6rzHB=qdKs8Y*8^)CqM)T~Jrl4RuF9p&qCw>V9{0--=C5DJ6>p+G1Q3WNfW3q0Hb{Bsl#@%U!A2n9lcP#_ct1ww&P zAQT7%LV+i{z@wvpumcDMLV-{q6bJ=Eflwe62n9lcr=`Hd9YB~46Q;w2=`f){C=d#S z0--=C5DJ6>p}P+3$Cl}8m&MN|n@MpaN%R1H-}HBe1d3)Mz-P+e3H)kh6bL(~X0Momyt)C@I8 zEl^9;3bjUUP+QavwMRdoA5jO?5p_bHQ5Vz|bwl0JPpAj#iF%>ls1NFk`l0^lXEXo} zM1#;^Gz1Mr!_aUv0*yqY&}cLUjYZ?ocr*b`M3c~DGzCpX)6jG@1I9M2pa3v;-|h%g}Q43;GqUKr7KIv>L5JYtcHi9&JDy(I&JRZ9!YnHnbh>Ks(Vc zv>WX~d(l3$A00pk(IIpg9YIIYF?1ZAKqt{DbQ+yOXVE!y9$i2e(Is>lT|rmTHFO=_ zKsV7XbQ|44chPU?ck~CkhyFx=q5J3o`Wr={NGLLT20e?OL(iiZ(2M9L^fGz{y^3B# zucJ56o9Hd{HhKrWi{3-;qYu!B=p*zo`UHK7K0}|QFVL6hEA%z`27Qa7pr|MsijHES zm?##CjpCrVC?1NB5}<@A5lW1bprj}nN{&*XlqeNSjnbgBC>=_VGN6no6UvOfLs?K( zlnrG^-=iETC(4C#qdX`t%7^l!0;nJ=gbJe~s3FW3aBEg zges#ds4A+4s-qgHCaQ&MqdKTAs)y>M2B;xwgc_qJs3~fOnxht|C2ECQqc*55YKPjR zAJC7e1L}x6q0Xoa>WaFd?&v4f1NB6`P;b-+^+o+qfAljNfCi#LXfPUrhN59;I2wUQ zqETox8iU57acDf6fF`0zXfm3DrlM(RI+}rIqFHD*nuF$|d1yXbfEJ=fXfaxXmZD{7 zIr;_tidLYNXcbzG)}Xa$9a@hzpp9q~+KjfKt!NwCj&`7(XcyXz_Mp9JAKH%&po8cT zI*g8>qv#kqj!vMH=oC7Q&Y-jC96FCKpo{1dx{R)%tLPfKj&7iv=oY$-?x4HqH}pID z1KmS^qQB66^Z@<+Z~gO1xaS|=3>Tq5C=d#S0--=C5DJ6>p+G3`WEXh!2-pix_7cPD zLV-{q6bJ=Eflwe62n9lcP~dTahdY3OjshYc-wYR_KqwFjgaV;JC=d#S0--=C@MITw zbQBPF0HHuA5DJ6>p+G1Q3WNfoKq&CE6nMA;2-9K0beN~+P2m60oAbhlLxE5r6bJ=E zflwe62nC+V0%1B#m<|)B!-N8%KqwFjgaV;JC=d#S0#8MOFdZgLhk5*vv?v`)k20W)C=<$z zzC&41R+J58N8h6yC@0E=a-%#bFUp7VqXMWPDufE7BB&@ThKi#Ss3aqXwuUYJ?i2Ca5WDhMJ=ms3mHJTBA0o zEoz6_qaV)C2WIy-;t|2lYk$P=E9@8h{3(L1-`ff~KNrXgZpKW};bWHkyOxqIqaOT7VX!MQAZv zf|jCXXgT@?{fbtgm1q@Ojn<&GXdPOQHlU4Y6WWZnpsi>d+KzUhooE-@jrO3uXdl{- z4xoeR5IT&Gprhy*I*v}DljsyWjn1I6=o~taE})C(61t48psVN_x{hw3o9Gt0jqaek z=r{B``UBlVf1 zy@TFG@1gh62k1le5&9T?f<8r`q0iA5=u7k!`Wk(MzC}?`R1^(GM=?-L6br>haZp?o z55-3bP(qXlB}Pe5Qj`oOM=4NBlnSLrX;4~}4y8vKP)3vqWk%njEGR3=hO(pYQ4W+7 zqJF49`WX#C1JNKf7!5%~(J(X|jX)#OC^Q<4L1WQ4 zG#*Vr6VW6z8BIY`(KIw2%|J8JEHoR7riD7l2KqwFjgaV;J zC=d#S0--=C@VLOk9l$?F0TGXHhKo=j6bJ=Eflwe62n9lcP#_d|vI{&q3J5!ZP#_ct z1ww&PAQT7%LV-{q6nI(+Jlp|<=`dkBOqdQ63WNfoKqwFjgaV;JC=d!fH3hW%uKzNjDSkA6l2&_FZ@4Ms!IP&5n;M&_px|O-57DR5T4uM>Eh&Gz-l}bI@Eg56wpl&_c8bEk;YwQnU;$N57z7 z(F(K@twO8O8nhOzL+jB7v=MDWo6#1u6>US?(GIi|?LxcJ9<&$jL;KMIbPydvhtUyq z6dgmy(Ft@CokFM48FUt%L+8;2bP-)bm(dk;6~ z^cT919-zNb1d4Y{q6K5Bp(qDH7OYJ!@gW~e!8fm)(gs5NSX+M;%-J^BIth&rH-s1xdpx}dJ88|sdJ zLOoDV)C=`SeNbQ25A{bsqXB3j8iWR;A!sNXhK8dNXe1hiMx!xkEEuY`N~ z@y&1%3WNfoKqwFjgaV;JC=d#S0#9~_~$4f;_=OJ5ekF?p+G1Q3WNfoKqwFjgaS`?fk#IHVFwTjgaV;JC=d#S0--=C z5DJ6>PfLM^JAg19CQOHUYTg9?AH6v*d^i*c1ww&PAQT7%LV-}=i7XJN!-VNDVLD7G z5DJ6>p+G1Q3WNfoKq&B36bRE{!gQF&4>>-19xw`uilU+DCXe=6s#-j;n zBASFIqbX=AnueyM8E7V&g=V8UXfB$E=A#8@AzFkMqa|o5T85URU(l~;1zL$#q19*& zT8q}9^=Jdyh&G|kXbakkwxR842il2tq1|W?+Kcv~{pbKXhz_B{=mCP;?Xn#YC}CY!nB@Me$I4lmI0}iBMvc1SLhuP;!(4 zr9`PvYLo`0Md?s_lmTT#nNVi*9m;~TqHHKT`X1##IZ-Z@8|6WHQ9hI(6+i`1AygO@ zK}At9R2-E+B~d9<8kIq1Q8`o|RX`O{B~%$zK~+&TR2|hoHBl{88`VK|Q9V>2H9!qf zBh(l*K}}IJ)Eu=yEm14f8nr=fQ9INg{eXT%9Z*Nq33WzYP*>Cqbw@v;9;hekg?ght zs4wb=`lFxG05lK{LW9u|G!zX(!_f#d5{*Km(HJxqjYH$n1T+y%LX*)HG!;!l)6onx z6U{=i(Ht}v%|r9i0<;h(F|%0c}K^&}Os+ zZAIJAcC-WSM7z*#vb#w#WM7PjwbO+rZE7Yc*|p+G1Q3WNfoKqwFjgaVHXJlp~Na}*Hq z_-42W1ww&PAQT7%LV-{q6bJ=EfhW7bqoaVZ0|*5|flwe62n9lcP#_ct1ww(RrNF}- z!2R?Q5pDatd;7Ua5s@N4^XzlazwqKqFTe8YYp=iY=38&S^X_}^fAHZ)AAj=cXP;LnBfrADQ89HqEh>@d4j~P2|{Dg^< zCQq3xoY*Awd>Yz*tlu)maW^i@7TF(_ny7` z_8&NS=-Cd^eg-CKi>67uk*<_(3|Kj^fr12y^G#M@1qaUhv*~pG5Q33iatZ1 zqc6~x=qvO!`UZWAqM)cK8j6l$pqMBYijCr+xF{Zqj}oASC=p7GlAxq08A^^)pp+;T zN{!N>v?v`)k20W)C=<$zzC&41R+J58N8h6yC@0E=a-%#bFUp7VqXMWPDufE7BB&@T zhKi#Ss3aqXwuUYJ?i2 zCa5WDhMJ=ms3mHJTBA0oEoz6_qaV)C2WIy-;t|2lYk$ zP=E9@8h{3(L1-`ff~KNrXgZpKW};bW zHkyOxqIqaOT7VX!MQAZvf|jCX|D{L2Ia~JVFU$GvFX&ga0twrn5db9y; zM4QlNv;}QN+t7Bj1MNh+&~CH`?M3_0eslmGM2FB}bOaqm$Ix+f0-Z#s&}noAoki!+ zd2|6?M3>NIbOl{S*U)uz1KmWo&~0=F-9^8l-_alF9{Lmgh3=yV=x-E(BB99W8T2fA z4n2=vKrf<~(97r*^eTD{y^h{MZ=$!*+vpwiE_x5Wk3K*jqL0wW=o9oQ`V4)JzCd51 zuh7@%8}u!Tf})~mC_0LPVxm|mHj0DdqIf7iN`MlgL?|&zf|8)ykQhYVk#KqwFjgaZGg3S5m8@$dn}|Nf0i_&FX5gaV;J zC=d$#-wQnYA87x*rTxEuHheb}c&Z9K`j;c@i9><^F$Kauu>YgBZBNw?)3Df3AQT7% YLV-{q6bJ=Ef&Ui;{{QX(9{%0_7sAAT(EtDd literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.11.1-zlib.cdata b/c-blosc/compat/blosc-1.11.1-zlib.cdata new file mode 100644 index 0000000000000000000000000000000000000000..0a74e50da9fa231bebd93d797616d2d68f877b29 GIT binary patch literal 16176 zcmeI3eN(Zs6*bu#k}7j1LKUdU{C6;{(@E zo$79$^Q(~IZ!bUm_5LD%i8^rF-}1r0Y2YZ^Tj;*<9=d1P^)o*|wCVa;U!R_Heb!f> zAGzKL7m}Pazx;HEbJnRpuXJX$eodXm_8%h_%=|*yx?tAHZXmR>-d|&0lr=_|k%v zjUTn<7q(q)?O1-qmD&DCOJioo!woh2+gJO$_jjyND@)o}ooFlRSlL|L(_Yxo*OPPl z(56w@-+X>#RF3xP4tI9v*VLVJM%vnM`C-bgs*CcDz=vd3_|;Q-K4h*!C|AbFI`CTWcU0Su_*iUrHtlmy?11K z-K8H=_SW@W<%;Vr_ZZLD>HQ-T%PxJNTwT_4CAy&O@@2lGOz#_=(|_q=dVPOSAhEpv zvYrzhb^83!7*b$JfnS&c!C;UMcXE@_X>zMJ+{H~r7sz;Rvy;0K{h6%LHoLeq^dkw^ z<~q44=o_+Fo9p6I(IqlnQ=D81`i@{tadGLWmt3b!a(X8tjohV8a(SnsAIKDKr_*~Q z`U`nf+v)PAp{pcTTkZ5tL7n6QZMDmrih4+v*6H-7AVxN5oi1-W>L=G~QBLz@bcWoa zMY+sV(f4Gc)*m500{7ziYHEbI7M{QjYE78t=5E#tKmVsMy-hu z5p2Zg)S@tP1f~DB37Db3fP=ftxcbK>m7Gk5?Jw{vu2>aCT zaIp~bu|@5U5R0J<2UJ6tupDm1BUHl};dihCZ&3~5!XvO4&r=N%!dk#sRV%}Uhv6<9 zbQ!bQD`W{>=rEd?lVs2)yV1zrAnRz8!ns)&U$hcAacOGeNHyJaSOiOi!34$Ew2 zBgwSNZkfeiCxx`iVaaAMkZ783w`8$b$ugSnu$Y*O@U-1-F|tY`&~}Fk22t+NMaF)t~mb&fzb+d>j)nLUujwvhr_<_MUWhvd+Hd%(!5NjV*E z<))!G$+L90jk_8BksPL*t=vuM59BSn*~VoeLe9{+R&F}_J$Z@FwQ)1hpU4+fv2q#c zeewZSY#fh1Cg0OZR_`?QHnGu3Ht)^oLvobvw0dts?~!-uPMbFq9VFk<)mHCxw3EC} zSKGWZ(0+1~c3Qm|Xb(9+J8fPb)sZVS%4(j5-Xbs1D4Y3a^Z}7+g*C7UUdHp~RD-wx zKEw@jYNB`>*l~(n6EEHk@8R8YO@cTVw&OK&jX_)p2k|+%CQ-Z6^?nGG9w``KGcTNQ$<{>AR8rg{;x`xO{2sD&aKb^i5%%#H=Bg zFO~HWqsE-R6vl`}V=iAh>n9_$1gB*(J45Da2`n4KM?y{t_J~CR%b_OOhN)~C^uE11wk)&&N&cKcA z94Xf7T!AzeAc*12nG4g??xVdBmGn=Sr^vV3PLFpyQpoGtYPWYHY9%MN)gEsWY9a@; zPPaE1wUaAar^h@F9VRbmQEu}D^aYW%D3AFDmV@UjsgdH7a1(A&QlrJE;lFT-QZrV3 z0&c{+m6|AVBgEk~N=>9#0<-Wrr6yV|gW;G{ipGi{Gg ziXl>X5~gESF+>Yb!*w`WsT`|!kgar~%{YVoiL}rrt1*MUPxjF!n~`T9lZ!OX>YK*i zCJvfr^WDroBn@YL7X5-&wI-wd{&_$jmcGT0vCqs->x zSse+`1gm8ldy8zL2{y~k>;t0GDy!ut_Ac2)t8A7`c7Sxze5++TdxvbJ`8LZ8_7OQj z+pU%iRzpPEZnN;LmgqFw8koj*5D(3^1#V{hNHeXo25w@z$zED#3uH1MIZw;1f$3}) zsitMNzzimm4%%-GWU#%Yp7z@UJbH#)LxjRd9CSn)pi5;sVxocIv@fK#NJ81Y{C4DXg|;=~8xFL;eq6Dux-Q}~=z z!-@HD6?0NijJOyS?3Ie*#0Q`go28;y@qTE=WY9(%ilY>`T(1rFnceu+PYDZI9bG ziJc?&qG|~q z%MI)(*`-yvEtA+u(xp{-EaRC>wrTlp%S3jZoY3+;mL%3lM6KOzNoH+C*V;XSaqI~3 zXxZ+-1lB^DwQNt|2G&6KYIW|wB<3gQwK`8=JX1-vR^|>&WG6_6R^|yLv1U@Q^}7Sf ztb<%L;5i2~PL~h=^uL|M z^3#X6B$R#gg_=;V9ouS^6gQt&pvirYjY0eI6 z4SVbQJN@VDf@jp~vi`FtI?95yxxT-@tK*v9zM(Osz>osJFa?6aa(D(0lSAYAJK;^d zNe)fmZ-*E0968j$FMvPdBXVdWe;ZhFlI)7-?}k6%9kMHdp9`ppOjrrI zm{;4z2y37Si)vfAPzZTgP}?GeVkpJBY6{br!yG(XHI31K2P?5hHHGVsz!JPjHAU!a zVI6K(Ys2)1VLnb*Yscu1!5X|*tqs>#gBcgAwGlc33qG%whUqI{9!^wC$LNnjA+AT=S5qc2_xI^s=(^tYGoTK)Q(bqsRu2=iQ^+G7XB;QS1PVZz653t{PDY@ z`7-!5j!<&P@)#!Itx9ebzX8VLB}#52zaDPJElO@QUkcab45e`_zYeD2eM(~#Z-IEc zPHBwf1(=O5Dvi;6Is6KbRQi&CN*&BMp?AqUw8Cc2L zPFtyiCqB2)XbPJ|X8N3Uk1P zdATiKm=85rl-m-7c~FT3xy>Lff?BN0ZHf9VunCWrP4W7jumgK!Q-XdwY{iRYlR;kq z`*5>tO4M%y2Tqr3VS@tceDz(c@jEF&T17N^LQkaSGA>X^F(x<9M_h5 z%t@$`e5AFy&B>^ZT-I7W{&DCCacP-u{{+-R8nsN1{|3}RYP9`s|0Lum-P(SSe>_r2 zrB>qhPedn3n^xlSC!uCitM$13$*6<$X+0j}ICh9^(nh(B6IlJvQ3v4(n1XjHl~KY* zh{c6UWu#C7S-4B7j26n^S{$w9jTJCV#M_j-C}9Ic;$=!+q_7@l;1fz-v``A6m{;1y z3hN*hi%MIRU;zUbl(tAg029`gwrHUohT+kQX{`P@Bw>$YiqfBg2)sx!Me0vN25weN z(fZRc5~nM*WA!H>1@BdAqx6ly;bNsWQZE4`KCje9>t!$kCn}|5bqvY4S}BdvH$XHl zP)Z~9^}yo}r8HVEh0!=i=^LxBgLGW4^hN0wNW|qzU!*QT4h|}zVf=Eq1rJk0$MC;{ z<#>}C8qPlg_ux5dXav6&9>+&iR~Y{=+=-J^*BJgW_#NJ%y2AO@@LRl6bw%(9p2Aek z4dYk9?Kncs9m78gkKnCpZaBXR?!!ye+z7r1p2RI`V;H{@7T^rEaSXo(*5Z9?V>n+3 z58-ucV+3CePveVfYM8hj%C)GUP=|@M!fu|0UMH()g~ObUULcWlsok7~UM2U_r4F+R zxyTIKYBw8EC0S2f9p)UgiGFJQbhMV{Ml#=G0+mb zKMQRmd9=jgHz5x((H^_sh^k2`?Q!^X*fV4p9c4GpWG|ASV)z;Aa1`&7D&vF);aglN zRmKWS;Uw;oDmft^uHa}XFGg4lGTtWT#R(6#|e3G8S_$GjIab6 zu_(3031;ZVg47l(EQ2nd0={0>z6YQ>=a;wBTmR#OV*g zMVv0x#_0D#1Kumu#_12jIb1B&#_CJKkIzfBoSqK>oG6vX=!-$c)lzAk{s4600;x1s zzaLKE4ylyW^FYTrQeTX|1e$TZ)EB3l;XE#v`eOBE(1C+eAIA^Y?)NsoIY_&FA!pMd z{r;ig^`i#4h+xJb3;c&qKNO_Fki}7^(L>B(h&lYiT;R~>4k_@@Q(%ZW3^9ix<}kz@ zhM2<;a~NU{L(Jh`_{A}YL537UdYfMyq}{%dGiUJ3A#{+77-9|s%;6no3jT3G$-nki zEcsQ4b;T`*Kl)p^p=fpO(Nlj5IxJncrR<~P&fyt{o*5Q8`_nf!g=R}H&IvUg`{R+& zoKLJtuGydcVTUVQd2OZ3)I_K&=kW6pxwDV{ers;F{K}GCQ{$gna&wM2Ga6?%yuYt8 zTYY0)qpA7hi;eY%o*S9^@uzP)QtPCb=cm>l`>-Lk{u6sj&Bvd;x4WiJ*}kTxw&~!x zn)<_=xuTDc?(`Pb$ra|J+Q$9YL%tCwz)2NsyH}o)a*Z>o-?XX zFCS&>wI+FHU43n($JqaT#G+YOURkop*yqe>o)vgwU9&Ox+{pBUKfXLa-PdbR*?aKn z_BDHb{hPVsgI6ld#lAkb@%+I+mF2uI`0R+pjX%CLFVWI#ORnB{_4UGPOaBYe1skut zx~#y`=i)my1}cRPOK{WZoU1=>U6d2(b)?r{z3MHl5A<(IEWdhXTS0lC&yy3p8mJx$ zI{fpD-q4%=Unvkw>>KeP5HSB2ZS}H(zY{PIx(DL!|FBEpmz5XbZ}~S&n~aoGVwjM?mY55v-8qnoxOvjle3Gfo4bdnm$#3vpMOALP;f|SSa?KaRCG*iTzo=eQgTXaT6#uiR(4Kq zUVcGgQE^FWS$RceRdr2mU427iQ*%peTYE=mS9ecuU;l)OlO|7@I&J!lnX_iknLBU( zf`y9~FIl>5`HGdRRKIRx^4T8ox67L*}HH5frEz*A31vL_=%IJPM`HPpYUcY(!?)`_4pFV&2`tAFVpTB

1H&f{mK;!aVPsB=Dt~S;4&kwOR2O%vjeynXRh#;Pp-{I$yh*84 zPo>^@E9L8_R8)XcGlG>$3sdS#xKjR+O2tGgl^my3dV*5AB&C8zD-}OQDYK?jsM2YA zgVD*^#pLQ{cK67VHJhhb_8d8L<<67WJ74|+7E3{&LWO<(iWDtYyhO=T{sE=S1ePsV zzCuuN#Y&Z{R1K*XY6+`eqh_tzb?Vj&uiv0yqsC1lnl_7U-lAoz)=_QRMz?FVTef#wvkT`Hq(%>OOhYcSw(lToFn6cx=PnbAq@|3C5rq7r; zYxbPE^X4x|UbtxSlBLU*udt-7T(x@5+I8zUY}~YY%hqk%ckJA?d(Yl|`wtvUJ#_fU z(PNh5Cr+lFI(_Esx$_q;reC^zneSEYktI?>Gk5U;QZMR9rC#5vdg8_`- z1kT_BCU6BeFoQdIKo-af*}xOLAUouMoRAB0LmtQr-jEOSLjkZrLGXb>P#Ao{4~jrh zCOftn z2jNg38bCv61dX8yL_kw$29eMlT0l!^1+5_p+CW=~hIY^%IzUJ01ToMVVj&K?Kv(Dn z-Ju7>Lr>@h3D6t*Kwszw{b2wk!ax`VNiZ0Oz)%tO?I zgiWv+w!l`{2HRl=?1Wvg8}`6n*a!RJ033u=I0T2`2polDa2!s+Nl1fJa2n3QSvUvh z;R0NQbhreU;R;-ZYj7QIz)iRXx8V-lg?n%x9>7C*1drhfWWZB+2G8LIyo6Wq8s5NL z_zu2@AK*v$3Esia@C&?$5AZ8|gir7pzQAwrJNyBE!e8(=STjB-&9)A>>M|$7Z+l6w zTHh;MAt<>TnfF2F)sSeCXtK`>5=|0KtQ%dRD|CbI&;#P3C-i~@=nZ|KFZ6@{FaQ!^ zAPj;e7z{&TC=7$)Faji+B$~Jn{x70Qr~B4_NE1yuolc@jqDgj|?4jg%mS~b_V!e`R zlAR{*1KDZPG>Imq*$#;&i6)6A$5^tDC7L9fB%0*$oUNDS@!Z!;(Ca0dl&06~C7L9f zB%17@l4z1>vdxNLGr?exXi}QNV325%Xp(4hj3xV6qDi7jqDi7jqRAdj zMx#WN(u_u5=|0K5=|0K_Gof)l4w$zlarG~lSGq5lVdE|#}Z8v zO%hEKO%hG^XmWOzXi}Q9v$I5#M3Y35J(L{J5=|0K5=|0K5>57Ka&eJpQksj4i$s$| zlSGqaEZN5rO%hEKO%hEKP4;LqnIxK&W-^&1nk1Sen(U$Ec$R3AXp(4>Xp(5MN0Y0o zM3d57ZHN3n`q!0G{t;YWqvDuMR;xslM3Y35M3Y35J(}FyB$|}w=H@2Rl(|$&G)Xi` zG|9W?Y)fQkQYjYVB$_0eB%18eWHw7QDa~v)OEgI|$>X{I%%#jvveP6xO%hEKO%hG^ zXmWR#Xi}QHySqe_M3Y35M3Yhh6hdhz1A$N$%0YRk06`E86`>MThAL1MLZBLiLKsws z8c-8zL2al5b)g=FLw#rf4WSV8E!32AT&PQw{E3+LcGT!4#^4)Pk6ubJTCA~xN&04kV)U6j@zd^%BjhjR?Z5G+Q zMax#LquR8MZr8qprDLa<&arV_x_0Z{Bfe*^gx-Dn_Uk_&ap0h&!9#`)8$M#BWz^^~ zW5^XDi&0mnbaM9u=OP4KQVM$rJYW146>(+1BxM}m2t=qQm z*tu)>p1u3_A2^tL=1)`pnsL=Pz7LzjXP^)oa&p+`M)B&fR9glAUcP$$=Iwtgnv$$aG3V$&13ehP2u|P(E?@#za04^Ag9l`RtdI>n!3(lO z4#)|)AUEWJyxC>p7Z?SbN%r;bvnl%ch~*B-oE$sy)rZ?pG_HCDW$Thh!CZ;nO{vc zRw|*OQm;!XRjsm8ZgrGeWT8}|P38|Blrncz%EwiyFn6U=e3i=Quas@DQUNoS%5A7r zM!GUEG|HAeN6uWvxlQuq&1agwK*2(Vixe$ZyhO=TrOTAnlq+AMVx`Jes#dFBqh_tz zb?Vlu-@vS4BXf(!O`2LZvubW_)1qZ7+tzk%?AtoDYu}+`CvE30j$NI)b??#B*~PV& zTkk%7`}H5-KG0*3rLx&9?F>=&s|1kk$1ILXInh-p3(&Q;qr%j(RbJpyT zIdkXDU$Ah|;?N~amn~nha@FcJYuAOX->`90_~tEJBOau_?K^kx-G7kw@R9EElc&%0 z&tJTJmHzt8+js9jeEjtJ%hzw;Gn7HbpFMQitk)q&cBM4RKXD623kN%Xa%;=8tkAA*h5=z zfOgOxIzUJ01X}0}UBD5#f)jLu?$85zf-|^)EA#?4=nZ|KFZ6@{FaX?PAb7wa@B}aL z1|JvcO;V=S5!YCLG{xAjtU@Qc}I2aE>Fad&LB20qGFa@T(?1Z~{)kDL4&@a0br8 zIXDj&;36c!CAbV%AQ`U0HAsQ$kP0{8CftJCa0l+fJ-81KAPpYEBhbNPcmhx18R+3T zynvVR3ew>4j#i3cnZ%z z56|HRyo6Vf4zJ-2yoGo09zMWF_ynKf3w(ud@EtOKttJBlCDmlWFp-GNnh8=(aQ+dU+4$@VF0+pK=6P;;0a#f4L&d!e8CTf zz)%5Vtc7(D2J2x1Y=lh^4x3>MY=sDjgl(`LcEC=E zf@s(UyI~K+KrF;TJnV&ikO2GP033uva2SrjQ8)(2;RKw7Q*as*;S8LGb8sFmz(q)c zOK=&kKr&o~YmfrhAr)@GO}GWO;SSt|dvG5fKpH%RN1%hp@C2U1Gtk3xcmXfr6{N#! zcmr?Y9lVDR@DV=2XZQkN;TwF1j9;tCP^w9%HDs7bHAyweCv-o9>95%!pU_D)v2Y9o z4;TcV;04~`1B1aA{9p(Sg<&upM!-lI1*5?q#y|j!g+Lew;~@y7nxvYT2mk+SGLmZ2 zX^j|CO+OY)xz{B3ntrB}+-u@G-2lv>Av6MWuz<$U1e$^+Gy^MW4%T1;EubZ|0$XSe zcF+dwp)EK-J7^Cbpd)kwEp&!1;0Rs83A#ad=m9;!8C<{>dVw4ChCa|2`aypf0PY~w zB=?$_2Xe1Tv77VfCGyYwN~g`nkZO`@lHK6HUjn3>SSh5Mq?(uq%V7nqgjKK_*1%d= z2Vt-tHo!*M1mUn5w!l`1fJoQ|+hGUngeZuHU9cPWKn%n}9K^$3*ar!)9}d7lI0T2` z2polDa2!s+NjL?kAra2NSvUt$O^U}~e!o!uY2WL#*%?wzQcY4#zrXpVnpi2MnxvYT z2U1N^O>E@JA=RYQ=3q!QNi|6|Nj24Bd8`Zdpgu@7Ni{JKq?)9fsHU7!O*(B(hE$VO zlT?#blT?%JYod@sVF@gSWgyig)kHPrl4{avb1|fvq?)9fq?)9fq?#xssV3Ri#60*8 z8NZ%9U@X<7(;73RnxvYfnxvYfnxvYfnq)V4W-ERBr)tVA)uhwrW=J(jHAyu|HAyu| zHAyu|HT}D4%4~(-FO+}U_d2af){s*!vwrh(UXz^H^z#xR&(N`g%5HGk4bD7}eND2j ziA=~N)uhwrVaUCvA8VyllT?#b6KkYALno(PG7n@oxSZGY>%OMEQcXH-UWQbYRFix{ z_p_J&o(=LBRpee1?I34^%e^M%!Fjj<7a<8Q!DYAt$#4~}K?+=lRJZ{*;TGJ6J8&27 z!F_lDY48vpfes$S6L<>GKo8I11-yh;kPffm4ZMYS@E$(ENB9Jv;R}3)f85uUPpV0$ z&Bu^x`g@&JjM_#@Wuz+uL!)fjbL7lroZBQ%-h8I{3luC=xJc1r#Y>bdRk}=BO}X+F zDpsmorE0b6HEPzXU8ion`VGt)HZr$p+@z^xGppv-HZ5AVvTbeG#=fmXyY?MAcG7n4 z;@H)xTlXG4on2gex%KYTw_pDO?gKprd3t&K4EFUKGIZGR5hF*9_8${4HgMedpb5bf zCrzF*b=ve9GiS{XnKO6Z`~?daEe>6>blLJ1D_5;vvvytB`VAX5g>T-nH6n7`_8mK; zqId1y6B8R3zjt54{sRXO9X@jO*zpr5Pn}LYbN1Z%3m20vUA~fh^;*jH)EhT%-M(}8 z-u(w@4=+rs+8VM9=fAdQIlz8G23T zZbQ8$b3*F57p&K0{?|sY$$Y2kHKvds3P3?91cjjp6oq0?97;e*Cx0DpZ5&Py=d0EvOB3;D3M4Q7uzzGrg(df4;B4&opTAf8`(Y(q_G3{sl#h B2krm> literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.14.0-lz4hc.cdata b/c-blosc/compat/blosc-1.14.0-lz4hc.cdata new file mode 100644 index 0000000000000000000000000000000000000000..7d32f90941966fe48bf2e93d0329ebfedd565355 GIT binary patch literal 36263 zcmeI*Wl)q|AAs?*U?Fz*+T8)7fT&ogxCoetiHaf?C?eS14HhaPD41ZQU@LZGi=b;P zDt7a7;q#26GtWEoe0b;m;QxI14Lb~9mb3S{xcBVANK-D0GOPYTc%7JG=HBI@)*AIdpb(a_-W#TlXG4U3$6p z?$ftl{{e0T2Mu-~GSp+3=kO6DM~(LK9y8WwobPx){|OT(O`bA!+VmMSXU(27ci#L3 z3l}Y3vUJ(<6#;=OSFK*NHfY`Y4Z$IyVH?9EHf@fK+Ol=q_8mK;V|MM{vv=SA1F;7W z#l;^!a`f2o6DLodPB?S+-1!R^FI~QJ_1g6ti8pWEzH|59{Rc@8lT#i&)<1dr?71QJ z#miS|uiw0V_x{7jPoKYh{q{XwY104fq1Rx0DpZ5& zPy=d0EvOB3pf1#d`p^IxLL)E(b7%|}&;*)-C0KzqGy@xG4lST1*g`944Q-$;v;#Y6 z4;`Q**h44Kfdg~~M{ojX=mK4#8+3;r&=Xvs7q~)i=mUMBAM}R-;06O>5DW%)7y?7V z1BQVo42Kag5=Oyj@B(ib17pDl#(^)42S4zK2`~{R!DN^MQ(+oRhZ!&vX2EQj19M>> z%!dWA5Ej8=SOQC787zkt5CDO&5>~-#SOaSz2-d-R*Z{#00-+EF8zCGbU=wVHNQiTuow2hemDTJa1ahb9K^$6I08rE7#xQaa1u_zX-I%Ga2C$N zdAI-<;SyYiD{vLA!F9L+iEtBc!ELw$ci|q~hX;@Z40-T37nw|bcJrv9eO}daDiUn3caBZ^o4%V9|nLM41_^27~Ek9 z35Vtc4(02kT)21VadfLKtj>aEO3Suo)sD z3bw#j*aq8S2keAsh=E9}Yk)9E3v<2k~$ij=)hk2FKw9oP<+w8WP|PoP~369xlK|xCEEs z3S5P2a2;+yBHV;qa2xKxUAPDL;Q=JULr8`ccm$6@4^QAJJcH+8fK+$^FX0uW!E1N} zZ{Z!hhY#=(KEY@B0$<@9e24U3tI0^JNv|_vm`XKCHOV`4KZEJ7*&y%GNj0%>41_^2 z7~Ek93Cfv(UEx2`eSg&tO=~ zLkNUI7;J=ah=5J786qJHw!l`{2HRl=?1X5DfnBg0_P}1)2m9dw#KJ*11aS}#hv5hu zg=26WPQXbx1*aha&cInX2U1OnkH7qWq5RVx>vdTfQcY4#Qcb_V_@$ayDWsaDnwSSt zO;Sy4{3m7U3P|4lT?#b zlT?#blk97vkXAqd1j0&?YLaTAnsP`r>2*06QcY4#QcY4#QcY4#6p~bv>}z5ke24U3 zPaZIlYSQaW7*b7AO;Sx#O;Sx#O;Sy=8$6?xzWq}*<&vA%rnxvYfnxvYfnxvYf znxvZkTQz00!tWQ#KkczzXPP~5X+1ErS zvA#VUek}YQmRR+Nver8Qa(c`r(7}*WH-2+*YxYYrrc6ZdR=aYRFhPbyhHc1 zm;Rm&@*h>?UK8ygXM@YVCg#C;xBwU75?qEWa22k>b+`eEa1(C9ZMXw>;U3(F2ap60 zAsJHO5j+MxJb|b144#7lQsD)>gjbLTui*{6g?I2CKEOx#1fSsxe1(79*OW)9Nw3Sp zkZSsSom7l!#!989DUFeFmaN&b=P=1>nk#pny!rAMC|Ia)k)p+lmnd1PbQx{ga^)*j ztW>#5)oRsi)T~vzPThL-8#HWWX5QGMNmENJ>t;61TeP%o)w)gFc6RMMbhPiJbLi~o z)8-hbZ!#0LTY}yECr+L^op9#tx$_q;Ub=kc>b2`P5^vtRedq4I`wx;H zCZ{}ltbg+K*>gkci~mJ6d1@7}Kn)myF=U*{Z{Ubo11F{#H~_@J=UENfjE`p+v>Ce% z4cd$eY2aS4L7VZXjX|67N;PQnLO#e31)v}lg2GS)ib63c4ke%@l!DSw2DDHX%0YRk y02QGURE8>06{>Fpzp_F|=# zR{wrdU(_kf7xh$^?b6gMr%(6RwTT;EOC%E4CI;VpP2$VTM|qw}B&y^}B-*r1B!(u`0}f-zxnpN?|=C5r=NexkTFx{ELpQ<&yh1%?mT((BCIvQ_IgZQHf)(6LkJE?v8I@6oeY z?>>F|^&c>B(BL6MhYcSwa@6QCW5^XDi&0nx^(c&dbmn~nh za@FcJYuBycuyNDoEnBy3-?4Mo?mc_=?LTnv(BUITj~zd8^3>@wXV0C#a53dlA|;V7 zexg7Whyqa{3Pgb@5Cx(@6iC(r=Mw3zx%EGs`jC@;!~@}mN%AS#3kqavs%Du#-q5~w69 zg-W9`s4Oan%A*RXBC3QcqbjH>s)nkg8mK0!g=(WZs4l99>Z1mzA!>vgqb8^+YKEGl z7N{j^g<7LFs4Z%T+M^DrBkF`Yqb{f`>V~?b9;hekg?ghts4wb=`lA78AR2@QqakQ0 z8it0W5ojbDg+`+>Xe=6s#-j;nBASFIqbX=AnueyM8E7V&g=V8UXfB$E=A#8@AzFkM zqa|o5T85UR6=)?|g;t|AXf0ZY)}sw*Bie*Eqb+DF+J?5H9cU-og?6JoXfN7__M-#n zAUcE&qa)}jI);v;6X+y5g-)Y0=qx&i&Z7(HB1%D*((1bZ{wY|R*TB@j8V6Az3Pgb@ z5Cx(@6o>**APOX9fy;mN6E6Zud1vFIqCgag0#P6eM1d#}1){+JroiPFf!M{wE+$#O zdyMOf0#P6eM1d#}1)@L{hyrN~#4aZB>k8-(sc(qC>W}D8=+Edc=yr4mx)a@n{)+yF z?nd{ZzoUPkd(l79ztF$YedvDl0D2HTgdRqZphwYT=yCJ}dJ;W_o<`50XVG)$dGrE$ z5xs<7Mz5e((QD{+^agqpy@lRJ@1S?ld+2@i0s0Vqgg!=}pij|f=yUW1`VxJGzDD1m zZ_#(?d-MbP5&eXIM!%p8C?m>*GNUXgE6RqlqZ}wF%7t>HJSZ>9hw`HWs30nY3Zo*Z zC@O}EqY|hjDuqg;GN>#nhsvW0s3NL_Dx)f>DyoL6qZ+6ts)cH!I;bwHhw7sSs3B^E z8lxttDQbqAqZX(oYK2;(HmEIXhuWhKs3YoxI-@SAE9!>2qaLUy>Vn-_qY-E%8ihuqF=#9rhsL7`Xd;?~CZj26Dw>9-qZw!>nuTVgIcP4L zhvuUNXdzmJ7NaF-DO!e>qZMc+T7_1lHE1nbht{JFXd~K$Hlr-JT_AQb$$fw0>Y_jthyqa{3Pgb@5Cx*ZRSU!}CjNghX+IQ< z<0udXqCgag0#P6eM1d%f#06p(6T6ty_u^kSLHD8i(F5o~^bmR&J%S!ZkD5^bz_PeS$tk zpP|pu7wAj$75W-|gT6)Iq3_WT=tuMu`WgL#GN6no6UvOTpsXkx%8qiNoG2H{jq;$p zC?Cp?3ZR0h5Gss{prWW4DvnB^lBg6ajmn_1s2nPfDxiv}5~_@W+G# zo~Retrm1q@Ojn<&GXdPOQHlU4Y6WWZn zpsi>d+KzUhooE-@jrO3uXdl{-4xoeR5IT&Gprhy*I*v}DljsyWjn1I6=o~taE})Aj z1zk$3E4!HVsVCzg3Pgb@5Cx(@6o>**APPi*** z;Hm{;7ZbafB>wI(E-eZ~fhZ6KqCgag0#P6eq%9D;nApXnz8A+XCSBscF@7!aB3)iW zFQZq`tLQcKI(h@WiQYnQqj%7|=som4`T%{1K0+U(Ptd36GxRz70)2_TLSLhA(6{J2 z^ga3k{fK@-Kcinz29yzHLYYw(loe$|*-;LZ6Xim=Q67{RTLXA-q z)D$&C%~1=~6176DQ5)12wL|Su2hd82W0#P6e zM1d#}1)@L{hyqa{xeLTDCb{o#TwN510#P6eM1d#}1)@L{xN3pe#l$Wq?T3PK90j64 z6o>**APPi*C=dmbxIpY;Vi%M8UL3oa*u|v#&-3sxzZ0LJPtj-SbMyuJ5`BffM&F=s z(Rb*3^aJ`4{e*r-zn}~#Bg%v_qbw*Z%7(I|94IHsg>s`jC@;!~@}mN%AS#3kqavs% zDu#-q5~w69g-W9`s4Oan%A*RXBC3QcqbjH>s)nkg8mK0!g=(WZs4l99>Z1mzA!>vg zqb8^+YKEGl7N{j^g<7LFs4Z%T+M^DrBkF`Yqb{f`>V~?b9;hekg?ghts4wb=`lA78 zAR2@QqakQ08it0W5ojbDg+`+>Xe=6s#-j;nBASFIqbX=AnueyM8E7V&g=V8UXfB$E z=A#8@AzFkMqa|o5T85UR6=)?|g;t|AXf0ZY)}sw*Bie*Eqb+DF+J?5H9cU-og?6Jo zXfN7__M-#nAUcE&qa)}jI);v;6X+y5g-)Y0=qx&i&Z7(HB1%D*((1}C=GxSgaS#Qf zKop1qQ6LILfhZ6KqCj#Nh+Rx_-`}{pC=dmrKop1qQ6LILfhcg*0x_ZXKJ z1)@L{hyqa{3Pgb@5Czf}h+RzVVp89WV;2*~r~~SVI-$;}3+jrx zq3);$>WO-x-lz}ii~6DdXaE|B2BE=d2pWopq2Xu*8i_`s(P#`Bi^iexXabsuCZWk_ z3Yv|$aUllDWwIF15QAPPi*C=dmrKop1qNn9XyF|mtDeJ_q(OzdJ}7n6bK zAtTC!GNUXgE6RqlqZ}wF%7t>HJSZ>9hw`HWs30nY3Zo*ZC@O}EqY|hjDuqg;GN>#n zhsvW0s3NL_Dx)f>DyoL6qZ+6ts)cH!I;bwHhw7sSs3B^E8lxttDQbqAqZX(oYK2;( zHmEIXhuWhKs3YoxI-@SAE9!>2qaLUy>Vn-_qY-E% z8ihuqF=#9rhsL7`Xd;?~CZj26Dw>9-qZw!>nuTVgIcP4LhvuUNXdzmJ7NaF-DO!e> zqZMc+T7_1lHE1nbht{JFXd~K$HlrU**APPi*C=dnG z7KmL;>|#>ii(?lPyO`L;WZ-$oh%%wfC=1GpvZ3rK2g-?Zq1-4B%8T-${HOpbhzg;? zs0b>GilO4D1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbps*CEO`ltbF zh#H~Bs0nI{nxW>X1!{>}q1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD>q28zu>WliJ z{%8Oihz6m-Xb2jLhN0nT1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL*2AYXxq1k8- znv3S4`Dg)Jh!&y6XbD=1mZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakkwxR842il2t zq1|W?+Kcv~{pbKXhz_B{=m**APPi*C=dmrKoq!Yf!M{wE+*}V zf^i%LqCgag0#P6eM1d#}1(LWx>|$aUllop9yO`L;#4aWS&qGF(31voEP*#);Wk)$s zPLvDfMtM+Pln>=c1yDg$2o**}P*GG26-OmdNmL4zMrBZ0R1TF#6;MS~2~|c_P*qe7 zRYx^YO;iijMs-kKR1eih4NybW2sK7cP*cr9Z*Nq33WzY zP*>Cqbw@o=Pt*(bMtx9U)DQJX1JFP;2n|L<&`>lC4M!u;NHhwKMq|)eG!BhN6VOC7 z2~9>*&{Q-HO-D1(Of(D4Msv_yG!M;33(!Ke2rWiS&{DJvEk`TRO0){CMr+Vov<|ID z8_-6y32jDO&{nh!ZAUxMPP7Z{Mtjg+v=8k^2hc%u2pvX8&{1>@9Y-h7NpuRGMrY7j zbPk**APPi*C=dmrKop1q$z33JG0A;@** zAPPi*C=dmrz*P&xE+%#{N&MYoTv`-}0#P6eM1d#}1)@L{NLwIwF|mtDeJ_q(OzdJ} z7n6bKAtTC!GNUXgE6RqlqZ}wF%7t>HJSZ>9hw`HWs30nY3Zo*ZC@O}EqY|hjDuqg; zGN>#nhsvW0s3NL_Dx)f>DyoL6qZ+6ts)cH!I;bwHhw7sSs3B^E8lxttDQbqAqZX(o zYK2;(HmEIXhuWhKs3YoxI-@SAE9!>2qaLUy>Vn-_ zqY-E%8ihuqF=#9rhsL7`Xd;?~CZj26Dw>9-qZw!>nuTVgIcP4LhvuUNXdzmJ7NaF- zDO!e>qZMc+T7_1lHE1nbht{JFXd~K$HlrU** zAPOXLf!M{wE++N8ICe3yi-}!K2A+qEC=<$zvY@Oe8_JGypqwZd%8l}%yeJ>aj|!lI zs1PcQilCyX7%GlRppvK*DvipZvZx#?k1C*ws1mA-s-UW<8mf+Jpqi)_s*UQPx~Lwi zj~bwcs1a(6nxLkr8ETGNpq8i=YK_{Uwx}Iyk2;`^s1xdpx}dJ88|sdFpq{7~>W%uK zzNjDSj|QNDXb>8VhM=Kn7#fa7ppj@48jZ%Fv1l9`k0zjrXcC%?rl6^48k&w~pqXeE znvLe5xo94mj~1YXXc1bBmY}6*8Cs53pp|G9T8-AAwP+n$k2auFW3aBEgges#d zs4A+4s-qgHCaQ&MqdKTAs)y>M2B;xwgc_qJs3~fOnxht|C2ECQqc*55YKPjR4yYsQ zggT=xs4MD*x}zSbC+dZIqdurF>WBKH0cao^ga)G_Xeb(nhNBT^BpQWAqcLbK8i&TC z31}jkgeIdYXeye9rlT2XCYpt2qd90Unuq421!y5!gchSEXenBTmZKGDC0d16qcvzP zT8Gx74QM0Ugf^osXe-)=wxb&y#4aZFy*PF; zv5SdaOa`8Zj3^VzjIyAtC>zR-a-f_j7s`$Dpu8v_%8v@5f~XKGjEbP5s2D1aN}!Uc z6e^9%pt7hODvv6lil`E*jH;ljs2Zw{YM`2^7OIWvpt`6Ys*f6=hNuy0jGCaPs2OUG zTA-Gw6>5#zpth(TYL7agj;IsrjJlw%s2l2zdZ3=D7wV1rpuVUd>W>DXfoKpKjE110 zXc!ufMxc>s6dH}jps{Ei8jmKRiD(jXpuK1x+K&#P zgXj=CjEo(-D6x@6o>**APPi* zC=dmrKom$@Aa*gai%ESij$KUbVqzDQf#)G3%7ikbEGR3=hO(m^C@0E=a-%#bFUp7V zqXMWPDufE7BB&@ThKi#Ss3aqXwuUYJ?i2Ca5WDhMJ=ms3mHJTBA0oEoz6_qYkJe>V!I@E~qQ&hPtC3s3+=$ zdZRw5FY1T-qXB3j8iWR;A!sNXhK8dNXe1hiMx!xkEEy+JrWvEodv+ zhPI;}XeZi*cB4ILFWQIpqXXz5I)o0RBj_kPhK{2X=p;IYPNOsEEINnIqYLOFN z>dG$WmeiAR5Cx(@6o>**APPi*C=dmrKynv|T}*P{-?+Lc5Cx(@6o>**APPi*C~(yR zv5SdaOxh0x<2VXLfhZ6KqCgag0#P6eByoY*#l$Wq^}RTDF|mt@T}%d^hm0r_%8at0 ztSB4Gj&h)!C>P3&@}RsZAIgskpn|9nDvXMtqNo@uj!K}Cs1z!V%Am5S94e10po*vx zs*I|js;C;Oj%uKqs1~Y?>Y%!)9;%NTpoXXsYK)qorl=Wej#{9Ws1<6B+Mu?m9cqs{ zppK{$>WsReuBaR8j(VV;s2A#u`k=n3AL@?=pn+%*8jOaZp=cNyjz*x7XcQWa#-Ooi z92$=%powS_nvABPsc0ISj%J{lXcn4{=AgM~9-5C9poM4=T8x&UrDz#ij#i+RXcbzG z)}Xa$9a@hzpp9q~+KjfKt!NwCj&`7(XcyXz_Mp9JAKH%&po8cTI*g8>qv#kqj!vMH z=oC7Q&Y-jC96FCKpo=I4T}rDfyO>*3PsTwMhyqa{3Pgb@5Cx(@6o>-JT_AQb$$fw0 z>Y_jthyqa{3Pgb@5Cx*ZRSR5x5jcKJB2lH;wI>qk66w-kbM1B4|K_(h+<4Q?x7_-> z+kXFtKmO^@f4TjRJMa4I-|oKW@Bg^>pa1&zefK}`;6o2T^5|oaKk?*KPe1eQbI-r< z;!7{T^6G1^zwzc)xYhuikz7_Uk`j;Gn@nh7KD(V&tgNW5$jfKVjme z$y26In?7Uatl4ws&YQnr;iAP$mM&YqV&$sUYu2t?zhUF1&0Dr^+rDGxuHAd~?%RLh z;Gx4ujvhOHB6cxJ{M}<*+W&EZ%b$!U5~=^p`1PB?bSOQ#23?D;L)W9e!(2eLO zbThgI{jWdh{Ph}s$6vb*{T}@R{So~M{TclQ-Hz@+ccQz{U(w&t-RK_lck~Z*FZw6? z7y38458aO*Ko6pa(8K5v^eB1^J&vA0Pok&L)94xWEP4(-k6u78qL{f1Oj_c;>%9=Pn;y`KUDxqQL*>1rDW4T$e7Ta*hA~oV$Dw4@VUEf4IPZ Seqq!6=hpx2-`C6k?)V>$W?A<9Me&LAXn4k6#`6EZQMkmV+X{MDS0eXkOdWktwc zo{%XALM&Gi;^j)nem6oct|#ON4?^D7BqXY(zh295p$;LJTZ%#-6xBAq*snEuEBDaS z$Y}S$SnXL0E#Y`-<-xdFuUPLoefi11-48Fc3i~R%A+7rGD>l26=`*Rt`qP$pBIC}9 zto3KCBhFs!z8iG=v{iV*rTbT8x6jx_o-Z34os8EPX zOX9+;ULK~7%Gfjm~Um*~f|k1nFg)R#V_YiSwXM<=N!JxCRJ z2oB*oScg?mh09BEQc;P*`hYsL?1yF$D5ROAIgmYmo9)Tmc0XAR_ z)SwO6U@qih2!voS^r9)4ViF{w5BT5%cz|nQ4VFSFegdE11We#en2B)^hwk8x)liKV zV1ZeXg+UO6GLRt$940~{Zh49(~SPFM`Z7zvR$0wbso`uGKWfu7)r^-z!YV2}Bb zk2_%}4!{7KgE^)^3i^XTwm}<;K!g=gfnpGASbN2Vy`z*%DNm(_hO&t{KAkV0r8G=X z1rvRup{`seC6iLH@k~l4<^L?@W+jR*rJSv7VwQ}HOO^(|KvO`yu1n&G0i%-rR0<|d zT>*?rMx{_HyA)l=vX!@ELA8=7h&iK@QK>HJ1m`j;8I`}A%0Irmf2TfH)SGDtzJO~) z5}r}Xs1!_^x&j!Lj7p(YN)@%LpmKn?Fe(|9>XJ@yE~AoBDU`}~MU4um)C?jU8I_Dm z!KA4xfKkb)6iQ{M;*Kh)Y$HC5N=Bu+q!XOWsAN(C8JU>Y3d4K zR5B`sQrV-ZQU#UuWDlc~QK>HJ1m`j;8I?k*>{HxS0hQYBQ7M=-bp!Je-|pRYSWz&?*xEPa@83w5{W-zf@50B|1{YYro8y}qT-dWRxGBdkYe)IG zu8Cvt!;F6vblQ0r1wXnFS~|RV_3oaBnIXk;m%YP1kFs`FOl#@rnXKK`b@|ECw~m>( zZ11DbjFxXYyEd@rYPZAoy0b1p^0KkH#=Ljgw(ct(|2=`Nr7WO$gOYt6J%znaJ6cPkW%BX~eKXg+lkMFFGW!p%@15%G zFC8&=-9J9r(OWzq+CM!$CCfh`5>HQ#4aw?Lgf*fhg|76dq{AS{w(ia3i_Q+%DYccn zA75Y?WXESwUuoEpfC+9h@!C~nl$*TBpLz!*ey5wAECzMME-|)?#me?TuNKzGl#E7%!E2uVpCOsipV326b kuk+2JuVJ@zg(NV(X57>9w+0y8ZkqSopQbg2wy%}^2TU3?{{R30 literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.14.0-zstd.cdata b/c-blosc/compat/blosc-1.14.0-zstd.cdata new file mode 100644 index 0000000000000000000000000000000000000000..0fbd05e588204edccadb3313fa8041da9b90cce8 GIT binary patch literal 3569 zcmZQ#oXEn!Y0JRCAi(gEmw`b6$eP3qB-j}k^0qnoxOvjle3Gfo4bdnm$#3vpMOALP;f|SSa?KaRCG*iTzo=eQgTXaT6#uiR(4Kq zUVcGgQE^FWS$RceRdr2mU427iQ*%peTYE=mS9ecuU;l)OlO|7@I&J!lnX_iknLBU( zf`y9~FIl>5`HGdRRKIRx^4T8ox67L*}HH5frEz*A31vL_=%IJPM`HPpYUcY(!?)`_4pFV&2`tAFVpTB

1H&f{mK;!aVPsB=Dt~S;4&kwOR2O%vjeynXRh#;Pp-{I$#L>)-tLtKa=TRjNOH{l`E3`J2D| z_1nMw{U87Q*LUCl@MG#Up=ray)1{Bd5E&I6lPWf2rp#HgX3L%(s4Rzd^%Bjhi%W);z97%T}%1v~8EF zeTR;nI(O;Xt$UC7p1l%!_vzcO|A2vm1`ioJZ1{+gqehQOHFn(i2@@wxo-%dX^cgc} z&7L!N-uwj%7cE}0blLJ1D_5;vlWOg{^&2*B+Pr1!w(UE1?%KU)@4o#94jwvuE zCr+L^eI`}n*>mSFT)cGo%GGPvZ`{0f`_A2a_a8ib^!UlsXU|`}eDyl<^?&!=f1Nz( zE`0D_AnR;f5$)YPy7qt;d}gmACVeq5Q?-2 zLpahQJtB|+k%&SxVi1dr$b`(uf~?4f?8t$f$c5af~u&7>ZpO5sD;|7gSx1P`e=ZLXoSXSf~IJO=7>WJv_vbk zMjNz6JG4g!bVMg~Mi+ENH*`l2#G@yAApyP72Yt~G{V@OoF$jY(1Vb?l!!ZIQF$$wG z24gV}<1qmfF$t3~1yeB%(=h`xF$=RX2XiqG^RWO6u?UN?1WU0D%drA0u?nlP25Yen z>#+eFu?d^81zWKV+pz;Xu?xGg2Yay(`*8pVaR`TT1V?cU$8iEDaSEq#28lR}b2yI+ zxQI)*j4QZ`Yq*XZxQSc1jXSuDd$^AWc!)=Mj3;=CXLybmc!^hdop?8p;@?k#kCXjp z75;ZW7GA%;8wg3tj6m>q{O67EzqccpPQi3~vl)WvWOJrQ8iXP(!Vr#hNRJ3)KqR6N zjTpotBQhZ~vLGw6AvYy&_p*|X*AsV4EnxH9~p*iBv0xi)Bt!*QIzNu0uIoIxVa;vCN70xsebF5?QW;u@~w z25#aOZsQK_;vVkf0UqKJ9^(m~;u)Uf1zzG6-btrq$^P5_q?S&}k|vl=!E_3K>hz|T z-ns_U$>vOrGzdjngdrU1kRB1pfJj6k8Zn4PMr1-}WIt^6hToGLvfTqNt8lqltEdPLwQs{MN~p%R6$i#Lv_?ZP1Hhd)InX;Lwz(rLo`BT zG(l4|LvzHT1zMsNTB8lxq8-|!13ID;I-?7^q8qxS2jbBay^w(3=!3rKhyECVff$6r z7=ob~hT#~2kr;*17=y7Ghw+$ziI{}Rn1ZR8hUu7rnV5yyn1i{Phxu55g;<2eSc0Wk zhUHj+l~{$;hy6H!gE)l4ID(@%hT}Ma zlQ@ObIDT*eh##Wh^V4cx>n+{PW;#Xa1|13bhdJjN3|#WOs|3%tZD zypvAJlLyl&5RyD;g6R}Yr(in0l}v9A!E~}YQzH#RkrrVHM>?cO1Tr8JQHVwiVv!M< zkQrH!71@v-Igk^%kQ;fB7x|DM1yB%$P#8r}6va>+B~TKjP#R@W7UfVL6;KhCP#INF z71dB3HBb|^P#bkn7xhpd4bTvc&=^h76wS~aacF^-Xoc2jgSKdg_UM3)=!DMbg0AR> z?&yJd^h7Ttpf~!UFZ!WB24EltVK9bZD28D;MqngHVKl~IEXH9xCSW2aVKSy*DyCsN zW?&{}VK(MqF6LoA7GNP3VKJ6qDVAY5R$wJoVKvrZE!JT@Hee$*VKcU1E4E=fc3>xV zVK??*FZN+S4&WdT;V_QiD30McPT(X?;WW-55od7@=WziSaS4}k1y^wm*Kq?kaSOL` z2X}D~_wfJ^@d%Ic1W)k{&+!5;@e1#xQ%FcKodO{ym`=fT3Z~PWT6*gmOedQ&HPRpy zX%U8Sq(gc{AOj*1g=oYe78#KVnUMuqkqz0A138fkxseBXkq`M%00mJ9g;4}WQ4GaV z0wqxjrBMcDQ4Zx%0TodRl~Dy%Q4Q5m12s_#wNVFkQ4jUe01eRyjnM>6(G1NIhZbmw zR%nejXp44ej}GXFPUws-=!$OWjvk0dPxL|pdZQ2eq96KW00v?Z24e_@Vi<;F1V&;M zMq>=dVjRX}0w!V-CSwYwVj8An24-RwW@8TKVjkvW0TyBr7GnvPVi}fW1y*7eR$~p; zVjb3F12$q4He(C6VjH$&2Xr$9)G zqzR@|Fr9+w^j0#xIRw+m=1h$=2t`_iAsp$D9udfZNJJqTF^EM*WI|?SK~`i#cH}@# zkb<{vj)Ix34 zL0!~CeKbHrG(uxEK~pqCbHt$qTA~$NqYc`k9onM>6n3;n1$JxgSnW8 z`B;F3ScJt`f~8o716wJj5eB z#uGfnGd#x&yu>TKlTPow7fh!>$a_f>Os8Nv1=HzGExmOOrjyN?8fg%UvfQqPu%BX^>sD|pOftsj=+NguNsE7J!fQD#<#%O}3Xolv9LkqM-E3`%%v_(6# zM+bC7Cv-*^bVWCGM-RlKCwd_Pz0n7K(GUGG00S`ygE0g{F$}{o0wXaBqcH|!F%IJ~ z0TVF^lQ9KTF%8o(12ZuTvoQyAF%R>x01L4Qi?IYtu?)+x0xPi!tFZ=au@3980UNOi zo3RC3u?^d?13R$`yRip*u@C!k00(ghhj9c)aSX?C0w-|_r*Q^}IE!;Qj|;enOSp_H zxQc7IjvKg%TeyuoxQlzZj|X^&M|g}Uc#3Cuju&`|S9m9#-hV%sPJxj3lO~u>!E_3y z(_6{(<`7ILn=>`iAQWj4hH#`qdPE=tA`yjX#2^+KkqMcR1zC{|*^vV|kqfzz2YHbX z`B4A`Q3!=m1VvE{#Zdw!Q3|C|24ztWo_0a$g z(Fl#v1WnNl%@KzdXo*&6jW%eDc4&_d=!j0}j4tSkZs?94h(}NKLIQfD5Bj1X`eOhF zVh{#n2!>)9hGPUqViZPW48~#{#$y5|ViG1}3Z`Njreg+XVism&4(4JW=3@aCVi6W& z36^3RmSY80Vii_n4c1~E)?))UViPuF3$|h#wqpl&Vi$H}5B6do_TvB!;t&qw2#(?y zj^hMQ;uKEf3=(k`=Wreua1obq8CP%>*Ki#-a1*z18+ULQ_i!H%@DPvi7*FsN&+r^C z@Di`^PC9+?K`@;HAs-}7Fr9+w6ilZ#we;3Cm`*llYNSCZ(jpAuNQd-@Kn6r23eku` zEHWY!G9wGJA{(+J2XZ18aw8A&A|LXj01BcI3Zn>$q8N&!1WKY5N}~+Qq8!Sj0xF^s zDx(Ujq8h5B25O=fYNHP7q8{p_0UDwa8lwrCq8XYa4lU3Uta>;w;YLJTBlOF5xn+;3}@+I&R=5 zZs9iW;4bdrJ|5s99^o;b;3=NrIbPr;Ug4c|`tZYGIt4;LOqyUi1=A^*PH!dCn?o?2 zY|hk3gHWVJ7{ZYb=@Eeph(r{k5rbG{L?&cL7Gy;>WJeCPAsC8b z7>*GbiBTAhF&K++7>@~6(G1NIhZbmwR%nejXp44ej}GXFPUws-=!$OWjvk0d zPxL|pdZQ2eq96KW00v?Z24e_@Vi<;F1V&;MMq>=dVjRX}0w!V-CSwYwVj8An24-Rw zW@8TKVjkvW0TyBr7GnvPVi}fW1y*7eR$~p;Vjb3F12$q4He(C6VjH$&2Xzr3$h{`vLgp_A{TNa5Aq@(@}mF>q7VwB2#TT@ilYQd zq7+J_49cP$%A*1*q7o{j3aX+Ss-p&Kq84hS4(g&F>Z1V~q7fRS37VoAnj;P^&=RfC z8g0-P?a&?_&=H-`8C}p7-OwF95Rabdg#`3QAM`~(^v3`U#2^gD5DdjI495tJ#3+o$ z7>va@jK>5_#3W3{6imf5OvenOCl9L&W$%*O&O#3C%l5-i0sEXNA0#44=D8mz@S ztj7jy#3pRU7Hq{fY{w4l#4hZ{9_+R z;36*JGOpk%uHiav;3jV2Htygq?%_Tj;2|F2F`nQlp5ZxO;3Zz+opk#6<6t@kLOwRZ z@1^f~u&7>ZpO5 zsD;|7gSx1P`e=ZLXoSXSf~IJO=7>WJv_vbkMjNz6JG4g!bVMg~Mi+ENH*`l2#G@yA zApyP72Yt~G{V@OoF$jY(1Vb?l!!ZIQF$$wG24gV}<1qmfF$t3~1yeB%(=h`xF$=RX z2XiqG^RWO6u?UN?1WU0D%drA0u?nlP25Yen>#+eFu?d^81zWKV+pz;Xu?xGg2Yay( z`*8pVaR`TT1V?cU$8iEDaSEq#28lR}b2yI+xQI)*j4QZ`Yq*XZxQSc1jXSuDd$^AW zc!)=Mj3;=CXLybmc!^hdC!Id|B$!TtkWZ2(m`?wBPzKW}m`=fTvMRsF5BL$Okp`hi zi!g*E9nvEL84!smL?Z^V$cRkHj4a5CY{-rr$cbFYjXcPUe8`UiD2PHRj3OwCVknLh zD2Y-ijWQ^Uawv}qsEA6aj4G&#YN(DHsEJyrjXJ1{dZ>>EXoyB=j3#J`W@wH$v_MO= zLTj`^TeL%abU;URLT7YAS9C*n^guj%q8Ad-8-36h{m>r+Fc5<<7(*}=!!R5pFcPCM z8e=dP<1ii*FcFh58B;J7(=Z(~FcY&d8*?xh^DrL^un>!|7)!7e%di|PuoA1V8f&l? z>#!ahuo0WE8C$Rw+prxwuoJtm8+))9`>-Dea1e)Z7)Njv$8a1ca1y6*8fTD*vp9$I zxPXhegv+>stGI^ixPhCvh1>EXoyB=j3#J`W@wH$v_MO=LTj`^TeL%abU;URLT7YAS9C*n^guj%q8Ad- z8-36h{m>r+Fc5<<7(*}=!!R5pFcPCM8e=dP<1ii*FcFh58B;J7(=Z(~FcY&d8*?xh z^DrL^un>!|7)!7e%di|PuoA1V8f&l?>#!ahuo0WE8C$Rw+prxwuoJtm8+))9`>-De za1e)Z7)Njv$8a1ca1y6*8fTD*vp9$IxPXhegv+>stGI^ixPhCvh1O_{ z8y21}eME-HsOXqfu^BUE&XP4-_8d8L<<65gU;Y9G3l%O>v{>;HB}# zs$8XNwdyr$)~a2nZoT>q8a8U&q-nF}aV=W5YTc%7yHxEvbnMi*OV@7Qd&KwbmC(CS z-+uiE3>-9g$k1WKM~oabdQ7UZzIBD{fsne#maka3 zYW12_YuBycuyNDoEnBy3-?4Mo?mc_=?LTnv(BUITj~zd8^3>@wsS?kgJAdKgrOQ|T z-_j}Wb<$Eu`t3`GC>p7Z?SbN%r;bvnl%ch~*B-oE$sy)rZ?pG_HCDW$Thh!CZ;nO{vc zRw|*OQm;!XRjsm8ZgrGeWT8}|P38|Blrncz%EwiyFn6U=e3i=Quas@DQUNoS%5A7r zM!GUEG|HAeN6uWvxlQuq&1agwK*2(Vixe$ZyhO=TrOTAnlq+AMVx`Jes#dFBqh_tz zb?Vlu-@vS4BXf(!O`2LZvubW_)1qZ7+tzk%?AtoDYu}+`CvE30j$NI)b??#B*~PV& zTkk%7`}H5-KG0*3rLx&9?F>=&s|1kk$1ILXInh-p3(&Q;qr%j(RbJpyT zIdkXDU$Ah|;?N~amn~nha@FcJYuAOX->`90_~tEJBOau_?K^kx-G7kw@R9EElc&%0 z&tJTJmHzt8+js9jeEjtJ%hzw;Gn7HbpFMQitk)q&cBM4RKXD623kN%Xa%;=8tkAA*h5=z zfOgOxIzUJ01X}0}UBD5#f)jLu?$85zf-|^)EA#?4=nZ|KFZ6@{FaX?PAb7wa@B}aL z1|JvcO;V=S5!YCLG{xAjtU@Qc}I2aE>Fad&LB20qGFa@T(?1Z~{)kDL4&@a0br8 zIXDj&;36c!CAbV%AQ`U0HAsQ$kP0{8CftJCa0l+fJ-81KAPpYEBhbNPcmhx18R+3T zynvVR3ew>4j#i3cnZ%z z56|HRyo6Vf4zJ-2yoGo09zMWF_ynKf3w(ud@EtOKttJBlCDmlWFp-GNnh8=(aQ+dU+4$@VF0+pK=6P;;0a#f4L&d!e8CTf zz)%5Vtc7(D2J2x1Y=lh^4x3>MY=sDjgl(`LcEC=E zf@s(UyI~K+KrF;TJnV&ikO2GP033uva2SrjQ8)(2;RKw7Q*as*;S8LGb8sFmz(q)c zOK=&kKr&o~YmfrhAr)@GO}GWO;SSt|dvG5fKpH%RN1%hp@C2U1Gtk3xcmXfr6{N#! zcmr?Y9lVDR@DV=2XZQkN;TwF1j9;tCP^w9%HDs7bHAyweCv-o9>95%!pU_D)v2Y9o z4;TcV;04~`1B1aA{9p(Sg<&upM!-lI1*5?q#y|j!g+Lew;~@y7nxvYT2mk+SGLmZ2 zX^j|CO+OY)xz{B3ntrB}+-u@G-2lv>Av6MWuz<$U1e$^+Gy^MW4%T1;EubZ|0$XSe zcF+dwp)EK-J7^Cbpd)kwEp&!1;0Rs83A#ad=m9;!8C<{>dVw4ChCa|2`aypf0PY~w zB=?$_2Xe1Tv77VfCGyYwN~g`nkZO`@lHK6HUjn3>SSh5Mq?(uq%V7nqgjKK_*1%d= z2Vt-tHo!*M1mUn5w!l`1fJoQ|+hGUngeZuHU9cPWKn%n}9K^$3*ar!)9}d7lI0T2` z2polDa2!s+NjL?kAra2NSvUt$O^U}~e!o!uY2WL#*%?wzQcY4#zrXpVnpi2MnxvYT z2U1N^O>E@JA=RYQ=3q!QNi|6|Nj24Bd8`Zdpgu@7Ni{JKq?)9fsHU7!O*(B(hE$VO zlT?#blT?%JYod@sVF@gSWgyig)kHPrl4{avb1|fvq?)9fq?)9fq?#xssV3Ri#60*8 z8NZ%9U@X<7(;73RnxvYfnxvYfnxvYfnq)V4W-ERBr)tVA)uhwrW=J(jHAyu|HAyu| zHAyu|HT}D4%4~(-FO+}U_d2af){s*!vwrh(UXz^H^z#xR&(N`g%5HGk4bD7}eND2j ziA=~N)uhwrVaUCvA8VyllT?#b6KkYALno(PG7n@oxSZGY>%OMEQcXH-UWQbYRFix{ z_p_J&o(=LBRpee1?I34^%e^M%!Fjj<7a<8Q!DYAt$#4~}K?+=lRJZ{*;TGJ6J8&27 z!F_lDY48vpfes$S6L<>GKo8I11-yh;kPffm4ZMYS@E$(ENB9Jv;R}3)f85uUPpV0$ z&Bu^x`g@&JjM_#@Wuz+uL!)fjbL7lroZBQ%-h8I{3luC=xJc1r#Y>bdRk}=BO}X+F zDpsmorE0b6HEPzXU8ion`VGt)HZr$p+@z^xGppv-HZ5AVvTbeG#=fmXyY?MAcG7n4 z;@H)xTlXG4on2gex%KYTw_pDO?gKprd3t&K4EFUKGIZGR5hF*9_8${4HgMedpb5bf zCrzF*b=ve9GiS{XnKO6Z`~?daEe>6>blLJ1D_5;vvvytB`VAX5g>T-nH6n7`_8mK; zqId1y6B8R3zjt54{sRXO9X@jO*zpr5Pn}LYbN1Z%3m20vUA~fh^;*jH)EhT%-M(}8 z-u(w@4=+rs+8VM9=fAdQIlz8G23T zZbQ8$b3*F57p&K0{?|sY$$Y2kHKvds3P3?91cjjp6oq0?97;e*Cx0DpZ5&Py=d0EvOB3;D3M4Q7uzzGrg(df4;B4&opTAf8`(Y(q_G3{sl#h B2krm> literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.3.0-lz4hc.cdata b/c-blosc/compat/blosc-1.3.0-lz4hc.cdata new file mode 100644 index 0000000000000000000000000000000000000000..cc539931e9cb421a29f2ba3d56ffc96c1cd3d202 GIT binary patch literal 31963 zcmeI*Rd5_v+6Lf}nVB)nykQ6IFvm&E%*>23Gc#LeW@ct)W(HYi2JJlA>;G!^zlpmq zo*I>^rn=ub`sU2^IiKQ0JR2{R`14RG^i1e*vrs5g_)%n@P$+fcP$>3#C?ZbWc<~b? zOq4iD(qzd~q)e4MP1mK4=Go_-f8oWKUVi1(*Is|)&9~lu=iT@I z_5KGRe)RDtpXU1P^Dn;4o#(5szxnpN?|=C5r=NfM^|#;i{_*Eu`SKSiSg3H3qQ#1r zC|Rm>nX=`|SEyL2a+Rvps@JGlt9G5b_3Afh*r;)nrp=m1wrJU^b(^;B+IQ&KsdJaE z-MaVa*{gS-zWw?S7&vI~kfFndM~xUcYV??~zIBD{fsne#maka3YW146>(+1BxM}m2t=qQm*tu)>p1u3_A2@jE@R6g(j-NPr>hzhj=gwca zc-L?y_wGM<_~`MI*ib}la0Cei2?PlQ2?PlQ2|T3<#N_>ZIuQSr zP$*|8A{3o70&x%*@em&gkPwNG7)g*6$&ef=kP@ko8flOg>5v{7kP(@X8Cj4O*^nJM zkQ2|~Sv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hkxOHe1H$}5kAHz_!PPD89v7s z_!7C12Vdc9e1mWC9lpm8_z^$hXZ(U+@f&_eUi^VS@fY$TKMJ5A3ZXEHpeTx=I7*-- zN})8$pe)LvJSw0fDxor}pem}NI%=RMYN0mjpf2j6J{q7Q8lf?opedT6IU>;lEzt_C z(FSeN4(-ta9nlG$(FI-64c*ZLJ<$uj(Fc9e5B)I!12G7LF$6;~48swH5g3V47>zL) zi*Xo_37CjUn2afyifNdR8JLM#n2kA@i+Pxj1z3nhSd1lDie*@i6PP?&KW}Zvr)JPA}-<~J`x}y z5+N~?ASsd|Ia1*7=S$^LYNSD0q(gdSKt^OjW@JHDWJ7l3Ku$b^XYm}K#|wB7FX3gp zf>-exUdJ1F6K~;dyn}b~9{z>*@c}->NB9_@;8WznXZRdn;7jC29(;wb@eRJkclaJZ z;79y~pYaQR#c%i>dGQDS#9zpV{3w8eD1^c&f}$vf;wXWVD237}gR&@x@~D7{sD#R> zf~u&7>ZpO5sD;|7gSx1P`e=ZLXoSXSf~IJO=7>ZKv_vbkMjNz6JG4g!bVMg~Mi+EN zH*`l2^h7W8Mj!M=KlH}{48$M|#t;m}Fbqc&MqngHVKl~IEXH9xCSW2aVKSy*DyCsN zW?&{}VK(MqF6LoA7GNP3VKJ6qDVAY5R$wJoVKvrZE!JT@Hee$*VKcU1E4E=fc3>xV zVK??*FZN+S4&WdT;V_QiD30McPT(X?;WWbxQK`NNPvV$gv3aKq)3M3NP(0{h15ucv`B~a$bgK< zgv`i-tjLD!$bs+)2K7${`H&w4P!NSs7)4MN#ZVk2P!gq38f8!xva@jK>5_#3W3{6imf5Oven& z#4OCl9L&W$%*O&O#3C%l5-i0sEXNA0#44=D8mz@Stj7jy#3pRU7Hq{fY{w4l#4hZ{ z9_+~qXH_T5-Ot#s-haI zqXufC7HXpo>Y^U%qX8PC5gMZjnxYw+BN8pp60Oi0ZO|6&&>kJo5uMN(UCcO{ z6TQ$Ieb5*E&>sUZ5Q8unLogJ>FdR`Bfsq)6(HMiV7>DtgfQgud$(Vwvn1<Q~(IEVANfQz_<%ZSDm#NaBf;W}>MCT`(2?%*!& z;XWSVAs*o|o*)(>L?900A|B!+0TLn+5+ezcA{mk+1yUjvQX>u0A|28r12Q5LG9wGJ zA{(+J2f`;9)IS;ILw*!MK@>t^6hToGLvfTqNt8lqltEdPLwQs{MN~p%R6$i#Lv_?Z zP1Hhd)InX;Lwz(rLo`BTG(l4|Lvuu;1zMsNTB8lxq8-|!13ID;I-?7^q8qxS2YR9x zdZQ2eq96KW00v?Z24e_@Vi<-a3L`KQqc9p{Fc#x59uqJTlQ0=mFcs4<9WyW!voITT zFcn+{PW;#Xa1| z13bhdJjN5mB7_LUL0rT`d?Y|ZBtl{&K~f|`a-={?q(W+>L0Y6kdSpOGWI|?SK~`i# zcH}_#1cUk~gM7%30w{<=D2yT~iee~^5-5pMD2*~Gi*hKB3aE%msEjJ8ifX8i8mNg{ zsEs)aV-40~9oAz5HewStV+*!o8@6Kyc48NHV-NOXANJz_ z4&o3F;|Px87>?rvPT~|!;|$K?9M0ncF5(g{BN|r_gR8iP>$riNxP{xegS)tg`*?td zc!bAzf>?wQfjEeZc!-Y#NQgv8j3h{kWJrz_NQqQPjWkG$bV!d3$cRkHj4a5CY{-rr z2%lh3|74I4`B4A`Q3!=m1VvE{#Zdw!Q3|C|24ztWo_0a$g(Fl#v1WnNl%@K(fXo*&6jW%eDc4&_d=!j0}j4tSkZs?94=!stFjXvm$ ze&~+@7>Gd_j3F3`VHl1mjKD~Y!f1@aSd7DXOu$4;!emUrR7}Hk%)m^{!fedJT+G9K zEWko6!eT7JQY^!AtiVdF!fLF+TCBr*Y`{ir!e(s2R&2v|?7&X!!fx!rUhKnu9Kb;w z!eJc2Q5?f@oWMz(!fBkrS)9XpT);(K!evC`3Sw{-*Ki#-a1*z18+ULQ_i!H%@DPvi z7*7z35F!uX^{@;kpUTz37L@vS&Y+XwpdlKeF`A$$nxQ!&(E=^e3a!xwZP5@~6PCTNOgXpTsnV#$p`CV*(~(5+-8`reYeVV+Lko z7G`4(=3*Y^V*wUo5f)?&yJ@2v(;At5b}F|9f?c z--f4;LH(1#`v~e^usY8;7>419!UzPb^Zse8^Sr7-{R_HN83z?n36)U=RZ$JqQ3Ewm z3$;-Pbx{xX(Ett62#wJMP0HJ)0(RXO1+9ms_Vv{4g&~qR53biH6UnP4vs2HZi+Z+Qg2zX%kmnNSjF9lt@&n zboH@o(j?NPz3S>~{&4Ldue<(+8*lp4pa1gLzukPxt+(BN$KU^P=UsRI^PYR}yZ>MR ze&E4}9)9G}#~y#;$)}!v=Go_-f8oWKUVi1(*Is|)&9~lu=iT?-|KP)qKK|s>&p!X+ z%dfuv=G*VS|KZ1BCIvQ_IgZQHf)(6LkJE?v8I@6oeY z?>>F|^&c>B(BL6MhYcSwa@6QCW5^XDi&0nx^(c&dbmn~nh za@FcJYuBycuyNDoEnBy3-?4Mo?mc_=?LTnv(BUITj~zd8^3>@wXV0Bay71rYPfDbT zuP6`&qCgag0#P6eM1d#}1yX8(9f>qo{pml?Ko=jH()`b_GieeRp93yeT67h<8eN0_ zfUZSY{q6K5Bp(qDH7OYJ!@g zW~e!8fm)(gs5NSX+M;%-J?eluqE4tY>VmqWZm2uzfqJ4|s5k0^`l5cQKN^4rqCsdd z8iIzRVQ4rSfkvWHXfzsw#-ed(Jeq(eqDg2nnu4aHX=pl{fo7svXf~RI=AwCMK3ael zqD5#iT7s6MWoS8CfmWhbXf;}c)}nQ2J=%aaqD^Qs+Jd&CZD>2%fp(%@XgAt}_M&}g zKRSR8qC@B~I)aX(W9T?Kfli`R=rlTm&Z2YZJW4_rF4r|T{_l^0$-f#0Q6LILfhZ6K zqCgag0#P6eq{spn|LG?l1XARkjf;u`Q6LILfhZ6KqCgag0{@o+zrO<(HTnj9i@rnOqaVKl||)Hc~k*aM3qowR0UN<)lhX*1Jy*eP;FEP)kXDCebfLoM2%2m z)C4s}%}{gH0<}b~P;1l%wMFevd(;7SM4eD))CF}#-B5Sb1NB6`P;b-+^+o+qe>4CM zM1#;^Gz1Mr!_aUv0*yqY&}cLUjYZ?ocr*b`M3c~DGzCpX)6jG@1I9M2pa3v;-|h%g}PP0twrn5db9y;M4QlNv;}QN+t7Bj1MNh+&~CH` z?M3_0eslmGM2FB}bOaqm$Ix+f0-Z#s&}noAoki!+d6a}MT&_#+#HC3-83$1y3Pgb@ z5Cx(@6o>**APS`H0x^zB+4ncDE(%0}C=dmrKop1qQ6LIju|SMt;(v~b|0yR5M1d#} z1)@L{hyqa{3Z$k2F^-9GO!C9=?+?Tepa;=I=wb8-dK5i|9!F20C(%>rY4i+w7Cnca zM=zik(M#xM^a^?vy@p;#Z=g5PTj*`{4tf{8hu%jYpbybU=wtK=`V@VJK1W}mFVR=% zYxE8J7JY}lM?at+(NE}S^b7hG{f5$^^e6+$h%%wfC=1GpvZ3rK2g-?Zq1-4B%8T-$ z{HOpbhzg;?s0b>GilO4D1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbp zs*CEO`ltbFh#H~Bs0nI{nxW>X1!{>}q1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD> zq28zu>WliJ{%8Oihz6m-Xb2jLhN0nT1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL* z2AYXxq1k8-nv3S4`Dg)Jh!&y6XbD=1mZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakk zwxR842il2tq1|W?+Kcv~{pbKXhz_B{=m-e#z7Q_0#P6eM1d#}1)@L{hyp3QK#XHj_Wg~kivm#~3Pgb@5Cx(@6o>*>ED+GilO4D z1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbps*CEO`ltbFh#H~Bs0nI{ znxW>X1!{>}q1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD>q28zu>WliJ{%8Oihz6m- zXb2jLhN0nT1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL*2AYXxq1k8-nv3S4`Dg)J zh!&y6XbD=1mZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakkwxR842il2tq1|W?+Kcv~ z{pbKXhz_B{=m**APPi*C=dm%SRlqRF^);~Z%4*rCJID>C=dmr zKop1qQ6N=_VGN6no6UvOTpsXkx%8qiNoG2H{jq;$pC?Cp?3ZR0h5Gss{prWW4 zDvnB^lBg6ajmn_1s2nPfDxiv}5~_@W+G#o~Retrm1q@Ojn<&GXdPOQHlU4Y6WWZnpsi>d+KzUhooE-@jrO3u zXdl{-4xoeR5IT&Gprhy*I*v}DljsyWjn1I6=o~talF)_Ab!i-Pb@ItLhyqa{3Pgb@ z5Cx(@6o>**AY~VbaZJj-zj1X@APPi*C=dmrKop1qQQ(RNVjL6Wm>9=IfhZ6KqCgag z0#P6eM1j;)AjUBnv;Zwci_l`U1T96&&~mf_twgKPYP1HeMeERdv;l2Io6u&o z1#Ly!&~~%~?L@oKZnOvOMf=cxbO0ShhtOek1RX`k&~bDEokXY5X>US?(GIi|?LxcJ9<&$jL;KMIbPydvhtUyq z6dgmy(Ft@CokFM48FUt%L+4Qvx^THJjbr|hd@>HAKop1qQ6LILfhZ6KqCgZ#*#%-8 zld|t`TwN510#P6eM1d#}1)@L{xMG1A$HX`$#xYSK3Pgb@5Cx(@6o>**AT9zdThTVO9qm9n(Jr(b z?Lm9dKC~YlKnKwwbQm2$N6|5K9GyTX(J6Eqok3^OIdmQ+p$nJm(m3YY**APPi*C=dmrz!eL`I3~t1sp&aj|!lIs1PcQilCyX7%GlRppvK*DvipZvZx#?k1C*ws1mA-s-UW< z8mf+Jpqi)_s*UQPx~Lwij~bwcs1a(6nxLkr8ETGNpq8i=YK_{Uwx}Iyk2;`^s1xdp zx}dJ88|sdFpq{7~>W%uKzNjDSj|QNDXb>8VhM=Kn7#fa7ppj@48jZ%Fv1l9`k0zjr zXcC%?rl6^48k&w~pqXeEnvLe5xo94mj~1YXXc1bBmY}6*8Cs53pp|G9T8-AAwP+n$ zk2au+;sX&ZlVjPqFa2(^97{|moCLN!L z^e6+$h%%wfC=1GpvZ3rK2g-?Zq1-4B%8T-${HOpbhzg;?s0b>GilO4D1S*M2q0*=f zDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbps*CEO`ltbFh#H~Bs0nI{nxW>X1!{>} zq1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD>q28zu>WliJ{%8Oihz6m-Xb2jLhN0nT z1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL*2AYXxq1k8-nv3S4`Dg)Jh!&y6XbD=1 zmZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakkwxR842il2tq1|W?+Kcv~{pbKXhz_B{ z=m**APPi*C=dm%SRlqRF^)-1FG8lOABi3p8wH|36o>**APPi* zC~(CBF^-9GO!C8VjALRP6XTe4d>+!H3@9VYgfgQnC@ac_vZEX*C(4C#qdX`t%7^l! z0;nJ=gbJe~s3FW3aBEgges#ds4A+4s-qgHCaQ&MqdKTA zs)y>M2B;xwgc_qJs3~fOnxht|C2ECQqc*55YKPjR4yYsQggT=xs4MD*x}zSbC+dZI zqdurF>WBKH0cao^ga)G_Xeb(nhNBT^BpQWAqcLbK8i&TC31}jkgeIdYXeye9rlT2X zCYpt2qd90Unuq421!y5!gchSEXenBTmZKGDC0d16qcvzPT8Gx74QM0Ugf^osXe-)= zwxb9>T2H9!qfBh(l*K}}IJ)Eu=yEm14f8nr=fQ9INgbwC|a zC)62rL0wTd)E)IeJy9>z8}&hbQ9sll4L}3YAT$^aK||3nG#rgUBhe@{8jV3?(Ks|7 zO+XXTBs3XKK~vE*G#$-AGtn$G8_hv;(L6LCEkFy=BD5GSK}*pxv>dHKE72;n8m&QV z(K@sqZ9p5*CbSuCL0i!_v>ok0JJBw*8|^`R(LS^v9Y6=sA#@lWK}XRsbR3;PC($W% z8l6FB(K&P;C7}zK>(V&phUAlR5Cx(@6o>**APPi*C=dmrK*}x<zR-a-f_j7s`$Dpu8v_%8v@5f~XKGjEbP5s2D1a zN}!Uc6e^9%pt7hODvv6lil`E*jH;ljs2Zw{YM`2^7OIWvpt`6Ys*f6=hNuy0jGCaP zs2OUGTA-Gw6>5#zpth(TYL7agj;IsrjJlw%s2l2zdZ3=D7wV1rpuVUd>W>DXfoKpK zjE110Xc!ufMxc>s6dH}jps{Ei8jmKRiD(jXpuK1x z+K&#PgXj=CjE** zAPPi*)KnnGF)@xwemIVCOpIe<9FvaELwb|}Wki`!W|ReGMcGhxlmq2NxlnGD2jxZi zP<~VZ6-0$lVN?VaMa58YR05SmrBG>929-tSPWHbd$Mbpr9 zGy}~UX0!!u zMcdGJv;*x#yU=d52kk}s(0+6P9YlxFVRQr?MaR%_bON13r_gD12AxIc(0P=EE?lll z*TyFiR%Quh6gtBV3rAPPi*C=dmrKop1qS1b_Y zm>9>TrWYYo)sIAvi;V(NAPPi*C=dmrKoq!Qff&ccI41ewIL0wCj)`$hIzA8SQ3jL| zWkQ)z7L*lbL)lRdloRDbxltaJ7v)3wQ2|sC6+(qk5mXcvL&Z@ER1%d!rBNAF7L`Ng zQ3X^HRYH|f6;u^fL)B3YR1?)gwNV{Z7u7@cQ3KQvHA0P16VwznL(NeO)DpEqtx+4) z7PUj|Q3uo!bwZs{7t|GXL)}pi)D!hWy-^?37xhE^(Ev0M4MKy_5Hu7GL&MPsG!l(M zqtO^N7L7yW(F8ORO+u5=6f_l0L(|a=G!xB2v(X$h7tKTS(E_v(EkcXY60{U8L(9<$ zv=XgCtI-;?7Og|;(FU{;Z9<#T7PJ*@L)*~~v=i+@yU`xB7wtp)(E)T29YTlE5p)zC zL&wnxbP}CHr_mX77M(-qQ4+dvxh{=k{*-(&4x&I5hyqa{3Pgb@5Cx(@6iC?xVjPpQ z?{8dP6o>**APPi*C=dmrKoq!Qfr}3Ur*29lDmA(K*fnVqY0_SG^)-LE_K(+Hf5VM8 z{prtt`Rm_qzU9{2ZolL2|G4w6yZ?F5z4zV!uYW)A;6o2T^5|oaKk?*KPe1eQbI-r< z;!7{T^6G1^zwzc>^q8^Z z#!r|yY4Vh*)27duIcxTux%1{PSh#5MlBLU*uUNTi^_sQo)^FIjY4et?+qUo6xoh{H zz5DhbIC$vrk)y|A924W1DDZzMaPhs-L?Zb&j^AG#OoP&*tI*Zx8uSNrE&3z64qcCK zKsTbB(Es|^&fl-$&-~h7&|lHt(9P%;bSt_I-Hz@+e@Fj7ccQz{-RPg_9&|6d58aRc zh5n5mKo6pa(8K5v^eB1^J&vA0Pok&L)94xWEP4(-k6u78qLa9GrYt{bp{Oei2y+8bG{Z~}v`f(BQl@SpUQzD+Mh=|Di?d`+dh=|4U5fRUv89vz- znHd`q@y|0y8V8OXKlS_i;i)yht&R4gt1aKPjT@gilmF|hj@HicS+k#h@t;BG-I~l< zTmJUu7yEshEZO$I!_TDXo)>4L-T(Mx_Z#=TG<)02AN=^`=8s>TwRP{?-@f<#$1llL zUH`g%Su*qCH~HuHdvunWj~sgXrT5N%s(ka|x3=s#bn1}*&Fd?A&TPAQy8GO#>tFXS zd%3J(aQ~+6(~W12G@N^F;~OW|9^F1P`1W4trt#MKYBE{-&3=fsU5wvQ?}Z+=2-pk(Gh7GcVp+3y*&d* zE`9T1L#XRm_*(v=XMXtU`PY4>R~9{c@#;(Oe}7^fJoDq_J>7oWIe2#H+CF=TRzLI0 z4?n-~#>uXap1FQ;#421G-1*BdKVGrCbFTN+FV}}u5j)3fjKCOyF#=-*#t4iN_#aN- z)<>haBBuOR;VGK*yMH?G_>X_ue)xCK^nX37dv-@eJ{gH_1*3vdVr!t4v<6$nI|Fx; zJA-$M&VZ9RgHCZ;U>cbgoF?uM>?iw!`^61`4P--bgGd5|kRTCL1F0l6m@4iL>?XT| zyTykB50Qt04~bs}z9e4;zZ4e)7LWzO1>$>w_sDy}_rxuMEo4h@i+DM3nOqKD7AFKI zkO{#FqAs8#x}Z+HFK{2ZFL0#A@9f=`GS z0~g7~;6*VzkWI3K+2YHAm&wb)m&G-KHDpb2jd&(-hMWnW5i0@}q#{@$9t|8NM}tSj z=K{}>=Yr3PHv%`vjo=M2HV{iqpo4S-JH(}drDSPvspt)Oi8tsK3j>9uFjy!a z3>+i}g9pX>Ks~7s){7Sc7s!R+1u-L#K{A3F;@-esvNyO_Tpd_VRtHy$rvsELOx zEKo+uf@R{7z!7pJctqSD*iN6c;0cilFoX#*qAg$}wxCVq0vzFjoJa$d&>$7_0(m4am?w4xx=2^BORNjjk-A`= zI2aftgTXmN~pqKOpd&QIQDqO18=#%QI-!}I) z=Qi&)$2QM4*EZis--vs}IpQ60jCe*|BfiAGM0cVy(VOT<^d!0xea1ec+vqfUjSi#7 z=ra12^(}KRb1w5Pb1d^Lb1m~7?>p{3?mX^2?l|r_?mF%(?kjc|JBz)=j$%)-tJv4m z*W>PS_IP_7J)Ry{kFT+>(cS25^fo#gJ&mqL-*Dfsd)PVb9d-&W%wx^jK{`u4f^Irn+@Ire$>x%Tpbf{>p1H< z>pJVJ?5lKFIxD@Ej!I9ZtI~I@@0k0T^O*OTzHpx-wyW<=ML`<#}3a9*ACy! zzMJlw&YRwwj+>sFu9&_U_s!0PT?vMS)&yNbM}jG#B|)3eo?uL9OVB5DCYbfjdX0XU z-k@*Q>+~IZlfFf-)wk=7`Zm2@->Em(G}mZqcGVbaT5EJQ9W|z!mKtqMdyTQCtwvwd zS!3>R?$`A1>NoVa_UrmP`c3^U{o4Naeq(=IzrMe--z;mEX=J-(23f02C+m=zWGynS ztX*c5waN6dPMNv8xm(k{tJ~1s+O6yE=r(n?bZfiYyN%s#-TLm%ZgWF(gQj6ugQ20d zLD$gHU}|V-&^EL;7#rFe^bMU2=Aq^x&CsqP!%*vxZm46(G}JPr9cmvk4z&&GhdPJM z(q^efx=U)1wn}x<4yj4nBGpRUrABF+R4?t6nyt-NjdhpRU~RSPtQ}U9wZ*Enwp)$X zHmly+X*Jh2*J^5a)f#GBYjw39wWivZT5WB6t+BSPR$tp$YaVDG&>J)KNJy~6dE7O42=s-4owW@grY*J zp$VbvP;4k8loZMfMTSyB@u93xOej5+7|IPrhtfg`A!#TsBnc%;A|#VS`OG9{9y6Yq z$z(E%m~qSuW-_yonaIpxa+t+T6f=!UWfm|KnAuD=Q^CYCg-ixh#w0NelgCssk<3&k zg_+O9Gqad1rksgk3Yc`Jlu2ZYnOvrliDsrVX-o-|z{nUWqhR8gB1XcRrJ&9&v*s<{Ym3O9+H$BpM^a+%yBZX7p*o6IfbCUSGQ9Bwfe#ZBW< zxdq$=ZZ?#Tg36;}iI+x}%5zG`?%}!w-Whb!@u=7}+9napw&Sd}0 zX0i{ni`Zw_aqJ)28SG>1WcE+&LiTBPB6~kOhkcUGVOe%D`xiEfy_22BZeUZ{hu8(| z7Ip%AA3K|Ug3V^vuodicY%IH!EoAH240bhJ#%^bm*ySw4a%>)3$5ydTY$SUJJC$9} zrmzpP^I62kv-h&I*vHu{_7S$6eU^=3m#_uwCN`a2#g?+$*hF?2Tg*1Hx$IiDlHI{Z zvv;x6*^O)(yOJ$ox3URr4J%_CSSee}Dp)le$KK5rv71>5t7PS@icMx$uyfg`*nD;! zTg~po5%>>y3Vsw%!Vlnin8)MsJ$NSmGtR^h<3;!xJP!X6&%lr2$@ourA$}T9#P{Pl z_(`0DS-cqk1xMjK@ie>vr{ah30=xxJ!1v+V_z9ei*We2L9FE0HaUrh98F)1=!`pEZ zUXB^e;XGW2t8f#J#CPDScs)+R590Y4;dp#6o`oOBS@;oLj-SObcnL1Rn{YZ_g-h`^ zoQRj6Ll9End3X@Mfcfw)Aov;LK@0c7Col`%g2&-U z$by&P5%?C$;X`;9u0ss8!xHd70UUr$a30cO53GVyPzr}(8;n3A7-1P4hhpe~Mi_=% z*avIjEL6fV*a0^o8rtA4aKUtV9X5gpY48H9gs-3k-iNJl1rk6HHP8<-=!OOu0x4Lb z76w28c2GkY;-C}m1|Jl`8?YI^2MJg}34V}+04fMUGMHfnoP@dX4m<^yARk_Vb#M-< z;Um}y5zq{`_!hne-NE019J~Wf<)@<8_}9>Semx5EL6pL$py&DL(S!Vh=nMV}G@qZ3 z-sRs#h)3vW{$~`=$0IGTMfdXeqEGlw&@6rydW(MxJW}uE#sG=Tv6>gW5BjF+Kqz8f|0 z4QPlTLQ-Ceth^P~^0jDyA3zFTf$Y2;sd+UD^I;Un$DvNX6Wz_(Ai{9bi zK~M2dp-cQFl+Wj*SNK=ZI({8G$Dc#hd^P%r{|N2mccNSTt>`_83Xe4Ee@88jxX6fk z5m8e@VR_db&EtN{VQ!~(N8V2NZ@s0rDe!8>Z4U7DZ@JU$RP%3X)@{`TlG|#Jh`4y0 z2>-C7>hs70Y5(~>T7I9#_3HJN=K|wa-KO$wZfvamzmvdUq9dM(h4Af6f_?s(w(0>$!Q+6Bm8n*EX@@o4{QY%fJ5IHBo_onr>M1<;SlZ zD$e|{(NKQ+8_}Q`{3Y%FMPImIxWD4t?^fPle(LM5?pK_>TJqVVuReSKvxqt8+`1m<7MYI^!FSYtREOYa&EnE-}b>x{r^)r<%3UODHaB|hjUef+d`G0LDjA3Rf&V{&Tep7R*$k0ltWLU!`zzcb#_FYuISot|J9N?t?nPKH zcIc&5+%8BFm+GYD+~43qaj9Nf$r)h2=+#LT+}{C-UcI!MYlV2RP-k7l?S^~BLcMh{ zr-NDIL7laNdl?=V59+N|TnA){^*U=gw-+7}>-E-3&IIM+1)WvFb-}aZ1--SJYk?Rs zLl<6wok;I4&K4fV1l72UvxGn4FHyg{SSqmiGLpG_GKGimr>NWAlP#>l7g2+|Cren3 z&!8c9k5s6`H;~lbm?=Dny~yfr%oZNO7f`LcF-us5Pon{Mqf}UnuOWqdI8#`OPawN{ zI9sU26sg_AS%MM|qOf~dDy+k!D9)Xmsd@lAP^UXLTlFyBDJGc})9AX8Sby zDRdK?$zDj$z!0&S?Ng~2tb{Y!3+QPWAe`Agot^+Yp(c9~9fUBUX7v>6fKHNUQqQ1Y zfRE&v)zj!F@CNBJsSD}1@IC1=tEW;ASV)~oT|iHPpVXPv)9G;#$e>AGM9)Hq44T7J zs0+-b$`qbKzk-vb${e0X`{5nZYYG?A0k}kZ&Ecuk2d|JOQ@DWo;T&l)ho{q%@DUj` zg^TDph$OKF=}ayFcaT`4bPo3uOeGx#>1^%?SWh~PQil5lQpi$+bQbp=JV=%rrN!J; zm`}V0sf_zKAmTMj=W@S7JSj9-XL1DYC51-o9PTpAA_ooD+1y2VoE$V-8SVyTk$Qu5 z7Iy(2A@xRUF?S8h$pwQ|#!+~dTrgVaa-$GKG7PmdX*Ejl6{iY+#v72{SDYd|h95#T zzT!0DNxTL1`+6n`8}NNd=Iconp1@C_ZeLG|P>vXH0c#pbQh$F%XNx!`Zri9F4rq6 zsR2sF6FP;0{vEc8C-jPH+6oCGqq8rfyP-y8^!CM62Qty7vsciUp+U6i?Nzh`q#~!Y zm(#saD{^{!B{hLUq&mBTc7a-?dV4i(fjBWwr(Q&#hr7i*y?Qa#LXp^|Q&-TJV6)hz zS69(?kcf3UbvfMwO0iC_uB1kgi-S6~g6;#AIH*@w(>6#Jt90Q-^aWTUR_ViwsUGHv zy}ED(wZcZ1=fj8`fw#RL$x@n3oGa=uv3iHNat~{LW>xyl`iD| z2@bJCBVE9~39pGA+TSjF0tCgS8tHuQAMm`mR4XmzK7cPouSQzJy$$b*UaeHl{R@5; z3pLhx+9u4p0E_Zj2b*Wxx#XM6b*TL@`XEa2arQk}NkWis@A-Atww98T~hGB`1uE zx%5{^AdJC2lM<*QjL|-aUIrPl8SJy^MQ9*4qn)8QKuS1+eHOg{wS+U;i|I8`5Nfc? zC_s^`$3p@?)D)U)Z2u$goj)eOB35>jVS&!Xo+N$QO1 zVmbnHGH6iC=rE|rpiw=S-h^aQWeCrtBCH@)#_$|^1?G}oLwGhFf~QEYG0e~~)65^I)D<35JhNQX&U$bADr z(qWcP<=pT*S!$9Na9_h0WT{y?o%;;lC0>)Xh{M0#efFBIQ#dDRNukL)gZmObA%$k^ zH11P)iySmr3%N7!BROccPUXDt5~(*?3%JwpEvYwKr*kLZLvq1nE#d~@I=Nu3ox(Yw zos6alJf4R1-r@}58Jvo0yv6Cl(|7^u_ZCZpzu*Z-=Ixm*Y{9cpx3?!lcn)Wy25(Qg zupL*RA#aaFXu`2b>TR4XAY6#7-o^~!S)74ty^ZO@He7}Vyp0lJ2Tno?@9<<{D`t@0 zJDee?aUN28htmZWu0mn&uteC2BT<|;ce0AdQ&FckHzRZoT%t*%SU}%|*F}?7QAPno zak)k@pZ)`05SMEerSt>%N<5)al+d@~eer}=A*cU>DeMXb}ROX*?oi-Q_<3GD$v9Mr1i^caN1 zDouDEeI3kVl{UPPz7HqGUQKub?S^;6UTwIH+ToJeqzTWbZ@?>JlQvvR1vn>;YQiP- z9r#Eb)rRHVm;o9yKw}1I%m9rUpfLk9W`O>y4AAJ@#M{ljZk5cq&G_cmlMmdc!cWzA zMcr$asSt6iWX|o)5b^6t{x%hUs&@Uq+xY3$2eBp@4ffld+)s~cCV{< z>YZ0|`HPvkFoj;kS9e-#@#|tkGaFDNBS>%A|slm8xU=lx$Jzg0E8U z{z_E|RBCjvQYXTcGDa&ERZXd+`br&Wq12~XrNZNsN=Q^{$1tVdjaSOYs8pmf7&VjG z#nsK??&0a>ohGf1uV1?K88T+doF%J&w(Po|BOqt4+<|%W=F1;cpkSfk!bOS}D;`oJ zG%UPisnTU4%0}u@(dEils933Tm8#XM*Qi;mcAdKQ>NjZEsBx2~&6>Ap*{ZePrfs|S zF|i#wcIw=vYq#z_;(GRq@7hzhj=gwcac9glAUcP$$&9~ou z|HF?zy?OiY=U?7``1tE@pML-2&%dlG)~|csSk0+FgsWMp0HspiD`iOeU^|gO0~46R z1zf=mEZ`0v;0a#f4QU`P_<%3?K{`ke86YEMg3OQwvVuQkgY2L~4hVppkPC7{Amo9( zkPq@h5EOudPzZvdFcg8JPz;Jg2$X8p*Qq_zK{U@AQAe*02l~^U@#=X5Eu%>U^t9`kuVBI!x$I~ z<6t~YfQc{(l3_ATfvGSJro#-F3A11}%z?Qu59Y%HSO|+?F)V?lund;N3RnrNU^T3P zwXhD>!v@$0n_x3+fvvC&w!;qC3AFR-S(Ri@9IfrlaWQuuZ6 z3|8w~rP)eD${kYLVp+l17Q#hh9r=Bl6vAi_@C61!SJ>JVNE?5Y$Np~^(6P6?7`%4lzNhS zV$YI#l6y~_2XgPpYWPhNJp4{2ONuJ#O^U0m2 zNj)izk$RGPl6tZSlfzN!N$N@JN$N@J$?5-|OeU!(rI~Ca^(6Hq_2f`Z_J!1w)RWYc z)RWYcQ$3l@Qcp@V+eYe1>PhO!9!w5LsVAu?sVAu?sVAp;a&eJ*Qksixq@JXnq@EnA z$-aNC#ff?C#ff=dUA7!~PdXjpQdXjpQdUC2Ki$&^5X%^c^JxM)DJ=ufF;VAVa^(6Hq^(6J= zR8Q{iQcp^Aw~f@3)RWYcLp9kKQcqG(QcqG(Qcq6xPhNJp4>TgQ|eOgJ;}W%sVAu?sVAp;^7fW`Qku7Iq@En=DMV8$QW=bz z$?W3lW^woM^zu%V*2mW`UHS|eGiA<_)jwNyUC$AaGgt1wJbClw4=PZwP;lWQMT->= zDG?eLUb0l_G7)7X^{D7_C-Fo#KG;GwkNz-P{TeNJ|T5r>~ zUHh2W4jnsn?$Whe_a1RQd&T$e(>I}CV*ddH2Mtadq7NN5e8k96qsNRLH-5szNy(F^ zOr17;#>`o>=ggfqf5E~EI`O4L6*Kgdsb^FfUd-oqaeDqjPdGhqx^A|5)z5dVY zDbcDF`;P&Ppn(a@-~z7T1{QD!5AXyp@P;&y7JR@L{2(2qhYXMrGC^j@0$ITyvO#vx zAqNCNPRIqhArSIFUdRXeAqWaUK_~>lP#B6pQ78t*Ap}Z5D1<>cl!Q`H8p=Qfl!Zu$ gf@mn05~{2pt$tDe{?e7m7sU+y_g@sR%C>p7Z?SbN%r;bvnl%ch~*B-oE$sy)rZ?pG_HCDW$Thh!CZ;nO{vc zRw|*OQm;!XRjsm8ZgrGeWT8}|P38|Blrncz%EwiyFn6U=e3i=Quas@DQUNoS%5A7r zM!GUEG|HAeN6uWvxlQuq&1agwK*2(Vixe$ZyhO=TrOTAnlq+AMVx`Jes#dFBqh_tz zb?Vlu-@vS4BXf(!O`2LZvubW_)1qZ7+tzk%?AtoDYu}+`CvE30j$NI)b??#B*~PV& zTkk%7`}H5-KG0*3rLx&9?F>=&s|1kk$1ILXInh-p3(&Q;qr%j(RbJpyT zIdkXDU$Ah|;?N~amn~nha@FcJYuAOX->`90_~tEJBOau_?K^kx-G7kw@R9EElc&%0 z&tJTJmHzt8+js9jeEjtJ%hzw;Gn7HbpFMQitk)q&cBM4RKXD623kN%Xa%;=8tkAA*h5=z zfOgOxIzUJ01X}0}UBD5#f)jLu?$85zf-|^)EA#?4=nZ|KFZ6@{FaX?PAb7wa@B}aL z1|JvcO;V=S5!YCLG{xAjtU@Qc}I2aE>Fad&LB20qGFa@T(?1Z~{)kDL4&@a0br8 zIXDj&;36c!CAbV%AQ`U0HAsQ$kP0{8CftJCa0l+fJ-81KAPpYEBhbNPcmhx18R+3T zynvVR3ew>4j#i3cnZ%z z56|HRyo6Vf4zJ-2yoGo09zMWF_ynKf3w(ud@EtOKttJBlCDmlWFp-GNnh8=(aQ+dU+4$@VF0+pK=6P;;0a#f4L&d!e8CTf zz)%5Vtc7(D2J2x1Y=lh^4x3>MY=sDjgl(`LcEC=E zf@s(UyI~K+KrF;TJnV&ikO2GP033uva2SrjQ8)(2;RKw7Q*as*;S8LGb8sFmz(q)c zOK=&kKr&o~YmfrhAr)@GO}GWO;SSt|dvG5fKpH%RN1%hp@C2U1Gtk3xcmXfr6{N#! zcmr?Y9lVDR@DV=2XZQkN;TwF1j9;tCP^w9%HDs7bHAyweCv-o9>95%!pU_D)v2Y9o z4;TcV;04~`1B1aA{9p(Sg<&upM!-lI1*5?q#y|j!g+Lew;~@y7nxvYT2mk+SGLmZ2 zX^j|CO+OY)xz{B3ntrB}+-u@G-2lv>Av6MWuz<$U1e$^+Gy^MW4%T1;EubZ|0$XSe zcF+dwp)EK-J7^Cbpd)kwEp&!1;0Rs83A#ad=m9;!8C<{>dVw4ChCa|2`aypf0PY~w zB=?$_2Xe1Tv77VfCGyYwN~g`nkZO`@lHK6HUjn3>SSh5Mq?(uq%V7nqgjKK_*1%d= z2Vt-tHo!*M1mUn5w!l`1fJoQ|+hGUngeZuHU9cPWKn%n}9K^$3*ar!)9}d7lI0T2` z2polDa2!s+NjL?kAra2NSvUt$O^U}~e!o!uY2WL#*%?wzQcY4#zrXpVnpi2MnxvYT z2U1N^O>E@JA=RYQ=3q!QNi|6|Nj24Bd8`Zdpgu@7Ni{JKq?)9fsHU7!O*(B(hE$VO zlT?#blT?%JYod@sVF@gSWgyig)kHPrl4{avb1|fvq?)9fq?)9fq?#xssV3Ri#60*8 z8NZ%9U@X<7(;73RnxvYfnxvYfnxvYfnq)V4W-ERBr)tVA)uhwrW=J(jHAyu|HAyu| zHAyu|HT}D4%4~(-FO+}U_d2af){s*!vwrh(UXz^H^z#xR&(N`g%5HGk4bD7}eND2j ziA=~N)uhwrVaUCvA8VyllT?#b6KkYALno(PG7n@oxSZGY>%OMEQcXH-UWQbYRFix{ z_p_J&o(=LBRpee1?I34^%e^M%!Fjj<7a<8Q!DYAt$#4~}K?+=lRJZ{*;TGJ6J8&27 z!F_lDY48vpfes$S6L<>GKo8I11-yh;kPffm4ZMYS@E$(ENB9Jv;R}3)f85uUPpV0$ z&Bu^x`g@&JjM_#@Wuz+uL!)fjbL7lroZBQ%-h8I{3luC=xJc1r#Y>bdRk}=BO}X+F zDpsmorE0b6HEPzXU8ion`VGt)HZr$p+@z^xGppv-HZ5AVvTbeG#=fmXyY?MAcG7n4 z;@H)xTlXG4on2gex%KYTw_pDO?gKprd3t&K4EFUKGIZGR5hF*9_8${4HgMedpb5bf zCrzF*b=ve9GiS{XnKO6Z`~?daEe>6>blLJ1D_5;vvvytB`VAX5g>T-nH6n7`_8mK; zqId1y6B8R3zjt54{sRXO9X@jO*zpr5Pn}LYbN1Z%3m20vUA~fh^;*jH)EhT%-M(}8 z-u(w@4=+rs+8VM9=fAdQIlz8G23T zZbQ8$b3*F57p&K0{?|sY$$Y2kHKvds3P3?91cjjp6oq0?97;e*Cx0DpZ5&Py=d0EvOB3;D3M4Q7uzzGrg(df4;B4&opTAf8`(Y(q_G3{sl#h B2krm> literal 0 HcmV?d00001 diff --git a/c-blosc/compat/blosc-1.7.0-lz4hc.cdata b/c-blosc/compat/blosc-1.7.0-lz4hc.cdata new file mode 100644 index 0000000000000000000000000000000000000000..cc539931e9cb421a29f2ba3d56ffc96c1cd3d202 GIT binary patch literal 31963 zcmeI*Rd5_v+6Lf}nVB)nykQ6IFvm&E%*>23Gc#LeW@ct)W(HYi2JJlA>;G!^zlpmq zo*I>^rn=ub`sU2^IiKQ0JR2{R`14RG^i1e*vrs5g_)%n@P$+fcP$>3#C?ZbWc<~b? zOq4iD(qzd~q)e4MP1mK4=Go_-f8oWKUVi1(*Is|)&9~lu=iT@I z_5KGRe)RDtpXU1P^Dn;4o#(5szxnpN?|=C5r=NfM^|#;i{_*Eu`SKSiSg3H3qQ#1r zC|Rm>nX=`|SEyL2a+Rvps@JGlt9G5b_3Afh*r;)nrp=m1wrJU^b(^;B+IQ&KsdJaE z-MaVa*{gS-zWw?S7&vI~kfFndM~xUcYV??~zIBD{fsne#maka3YW146>(+1BxM}m2t=qQm*tu)>p1u3_A2@jE@R6g(j-NPr>hzhj=gwca zc-L?y_wGM<_~`MI*ib}la0Cei2?PlQ2?PlQ2|T3<#N_>ZIuQSr zP$*|8A{3o70&x%*@em&gkPwNG7)g*6$&ef=kP@ko8flOg>5v{7kP(@X8Cj4O*^nJM zkQ2|~Sv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hkxOHe1H$}5kAHz_!PPD89v7s z_!7C12Vdc9e1mWC9lpm8_z^$hXZ(U+@f&_eUi^VS@fY$TKMJ5A3ZXEHpeTx=I7*-- zN})8$pe)LvJSw0fDxor}pem}NI%=RMYN0mjpf2j6J{q7Q8lf?opedT6IU>;lEzt_C z(FSeN4(-ta9nlG$(FI-64c*ZLJ<$uj(Fc9e5B)I!12G7LF$6;~48swH5g3V47>zL) zi*Xo_37CjUn2afyifNdR8JLM#n2kA@i+Pxj1z3nhSd1lDie*@i6PP?&KW}Zvr)JPA}-<~J`x}y z5+N~?ASsd|Ia1*7=S$^LYNSD0q(gdSKt^OjW@JHDWJ7l3Ku$b^XYm}K#|wB7FX3gp zf>-exUdJ1F6K~;dyn}b~9{z>*@c}->NB9_@;8WznXZRdn;7jC29(;wb@eRJkclaJZ z;79y~pYaQR#c%i>dGQDS#9zpV{3w8eD1^c&f}$vf;wXWVD237}gR&@x@~D7{sD#R> zf~u&7>ZpO5sD;|7gSx1P`e=ZLXoSXSf~IJO=7>ZKv_vbkMjNz6JG4g!bVMg~Mi+EN zH*`l2^h7W8Mj!M=KlH}{48$M|#t;m}Fbqc&MqngHVKl~IEXH9xCSW2aVKSy*DyCsN zW?&{}VK(MqF6LoA7GNP3VKJ6qDVAY5R$wJoVKvrZE!JT@Hee$*VKcU1E4E=fc3>xV zVK??*FZN+S4&WdT;V_QiD30McPT(X?;WWbxQK`NNPvV$gv3aKq)3M3NP(0{h15ucv`B~a$bgK< zgv`i-tjLD!$bs+)2K7${`H&w4P!NSs7)4MN#ZVk2P!gq38f8!xva@jK>5_#3W3{6imf5Oven& z#4OCl9L&W$%*O&O#3C%l5-i0sEXNA0#44=D8mz@Stj7jy#3pRU7Hq{fY{w4l#4hZ{ z9_+~qXH_T5-Ot#s-haI zqXufC7HXpo>Y^U%qX8PC5gMZjnxYw+BN8pp60Oi0ZO|6&&>kJo5uMN(UCcO{ z6TQ$Ieb5*E&>sUZ5Q8unLogJ>FdR`Bfsq)6(HMiV7>DtgfQgud$(Vwvn1<Q~(IEVANfQz_<%ZSDm#NaBf;W}>MCT`(2?%*!& z;XWSVAs*o|o*)(>L?900A|B!+0TLn+5+ezcA{mk+1yUjvQX>u0A|28r12Q5LG9wGJ zA{(+J2f`;9)IS;ILw*!MK@>t^6hToGLvfTqNt8lqltEdPLwQs{MN~p%R6$i#Lv_?Z zP1Hhd)InX;Lwz(rLo`BTG(l4|Lvuu;1zMsNTB8lxq8-|!13ID;I-?7^q8qxS2YR9x zdZQ2eq96KW00v?Z24e_@Vi<-a3L`KQqc9p{Fc#x59uqJTlQ0=mFcs4<9WyW!voITT zFcn+{PW;#Xa1| z13bhdJjN5mB7_LUL0rT`d?Y|ZBtl{&K~f|`a-={?q(W+>L0Y6kdSpOGWI|?SK~`i# zcH}_#1cUk~gM7%30w{<=D2yT~iee~^5-5pMD2*~Gi*hKB3aE%msEjJ8ifX8i8mNg{ zsEs)aV-40~9oAz5HewStV+*!o8@6Kyc48NHV-NOXANJz_ z4&o3F;|Px87>?rvPT~|!;|$K?9M0ncF5(g{BN|r_gR8iP>$riNxP{xegS)tg`*?td zc!bAzf>?wQfjEeZc!-Y#NQgv8j3h{kWJrz_NQqQPjWkG$bV!d3$cRkHj4a5CY{-rr z2%lh3|74I4`B4A`Q3!=m1VvE{#Zdw!Q3|C|24ztWo_0a$g(Fl#v1WnNl%@K(fXo*&6jW%eDc4&_d=!j0}j4tSkZs?94=!stFjXvm$ ze&~+@7>Gd_j3F3`VHl1mjKD~Y!f1@aSd7DXOu$4;!emUrR7}Hk%)m^{!fedJT+G9K zEWko6!eT7JQY^!AtiVdF!fLF+TCBr*Y`{ir!e(s2R&2v|?7&X!!fx!rUhKnu9Kb;w z!eJc2Q5?f@oWMz(!fBkrS)9XpT);(K!evC`3Sw{-*Ki#-a1*z18+ULQ_i!H%@DPvi z7*7z35F!uX^{@;kpUTz37L@vS&Y+XwpdlKeF`A$$nxQ!&(E=^e3a!xwZP5@~6PCTNOgXpTsnV#$p`CV*(~(5+-8`reYeVV+Lko z7G`4(=3*Y^V*wUo5f)?&yJ@2v(;At5b}F|9f?c z--f4;LH(1#`v~e^usY8;7>419!UzPb^Zse8^Sr7-{R_HN83z?n36)U=RZ$JqQ3Ewm z3$;-Pbx{xX(Ett62#wJMP0HJ)0(RXO1+9ms_Vv{4g&~qR53biH6UnP4vs2HZi+Z+Qg2zX%kmnNSjF9lt@&n zboH@o(j?NPz3S>~{&4Ldue<(+8*lp4pa1gLzukPxt+(BN$KU^P=UsRI^PYR}yZ>MR ze&E4}9)9G}#~y#;$)}!v=Go_-f8oWKUVi1(*Is|)&9~lu=iT?-|KP)qKK|s>&p!X+ z%dfuv=G*VS|KZ1BCIvQ_IgZQHf)(6LkJE?v8I@6oeY z?>>F|^&c>B(BL6MhYcSwa@6QCW5^XDi&0nx^(c&dbmn~nh za@FcJYuBycuyNDoEnBy3-?4Mo?mc_=?LTnv(BUITj~zd8^3>@wXV0Bay71rYPfDbT zuP6`&qCgag0#P6eM1d#}1yX8(9f>qo{pml?Ko=jH()`b_GieeRp93yeT67h<8eN0_ zfUZSY{q6K5Bp(qDH7OYJ!@g zW~e!8fm)(gs5NSX+M;%-J?eluqE4tY>VmqWZm2uzfqJ4|s5k0^`l5cQKN^4rqCsdd z8iIzRVQ4rSfkvWHXfzsw#-ed(Jeq(eqDg2nnu4aHX=pl{fo7svXf~RI=AwCMK3ael zqD5#iT7s6MWoS8CfmWhbXf;}c)}nQ2J=%aaqD^Qs+Jd&CZD>2%fp(%@XgAt}_M&}g zKRSR8qC@B~I)aX(W9T?Kfli`R=rlTm&Z2YZJW4_rF4r|T{_l^0$-f#0Q6LILfhZ6K zqCgag0#P6eq{spn|LG?l1XARkjf;u`Q6LILfhZ6KqCgag0{@o+zrO<(HTnj9i@rnOqaVKl||)Hc~k*aM3qowR0UN<)lhX*1Jy*eP;FEP)kXDCebfLoM2%2m z)C4s}%}{gH0<}b~P;1l%wMFevd(;7SM4eD))CF}#-B5Sb1NB6`P;b-+^+o+qe>4CM zM1#;^Gz1Mr!_aUv0*yqY&}cLUjYZ?ocr*b`M3c~DGzCpX)6jG@1I9M2pa3v;-|h%g}PP0twrn5db9y;M4QlNv;}QN+t7Bj1MNh+&~CH` z?M3_0eslmGM2FB}bOaqm$Ix+f0-Z#s&}noAoki!+d6a}MT&_#+#HC3-83$1y3Pgb@ z5Cx(@6o>**APS`H0x^zB+4ncDE(%0}C=dmrKop1qQ6LIju|SMt;(v~b|0yR5M1d#} z1)@L{hyqa{3Z$k2F^-9GO!C9=?+?Tepa;=I=wb8-dK5i|9!F20C(%>rY4i+w7Cnca zM=zik(M#xM^a^?vy@p;#Z=g5PTj*`{4tf{8hu%jYpbybU=wtK=`V@VJK1W}mFVR=% zYxE8J7JY}lM?at+(NE}S^b7hG{f5$^^e6+$h%%wfC=1GpvZ3rK2g-?Zq1-4B%8T-$ z{HOpbhzg;?s0b>GilO4D1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbp zs*CEO`ltbFh#H~Bs0nI{nxW>X1!{>}q1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD> zq28zu>WliJ{%8Oihz6m-Xb2jLhN0nT1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL* z2AYXxq1k8-nv3S4`Dg)Jh!&y6XbD=1mZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakk zwxR842il2tq1|W?+Kcv~{pbKXhz_B{=m-e#z7Q_0#P6eM1d#}1)@L{hyp3QK#XHj_Wg~kivm#~3Pgb@5Cx(@6o>*>ED+GilO4D z1S*M2q0*=fDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbps*CEO`ltbFh#H~Bs0nI{ znxW>X1!{>}q1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD>q28zu>WliJ{%8Oihz6m- zXb2jLhN0nT1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL*2AYXxq1k8-nv3S4`Dg)J zh!&y6XbD=1mZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakkwxR842il2tq1|W?+Kcv~ z{pbKXhz_B{=m**APPi*C=dm%SRlqRF^);~Z%4*rCJID>C=dmr zKop1qQ6N=_VGN6no6UvOTpsXkx%8qiNoG2H{jq;$pC?Cp?3ZR0h5Gss{prWW4 zDvnB^lBg6ajmn_1s2nPfDxiv}5~_@W+G#o~Retrm1q@Ojn<&GXdPOQHlU4Y6WWZnpsi>d+KzUhooE-@jrO3u zXdl{-4xoeR5IT&Gprhy*I*v}DljsyWjn1I6=o~talF)_Ab!i-Pb@ItLhyqa{3Pgb@ z5Cx(@6o>**AY~VbaZJj-zj1X@APPi*C=dmrKop1qQQ(RNVjL6Wm>9=IfhZ6KqCgag z0#P6eM1j;)AjUBnv;Zwci_l`U1T96&&~mf_twgKPYP1HeMeERdv;l2Io6u&o z1#Ly!&~~%~?L@oKZnOvOMf=cxbO0ShhtOek1RX`k&~bDEokXY5X>US?(GIi|?LxcJ9<&$jL;KMIbPydvhtUyq z6dgmy(Ft@CokFM48FUt%L+4Qvx^THJjbr|hd@>HAKop1qQ6LILfhZ6KqCgZ#*#%-8 zld|t`TwN510#P6eM1d#}1)@L{xMG1A$HX`$#xYSK3Pgb@5Cx(@6o>**AT9zdThTVO9qm9n(Jr(b z?Lm9dKC~YlKnKwwbQm2$N6|5K9GyTX(J6Eqok3^OIdmQ+p$nJm(m3YY**APPi*C=dmrz!eL`I3~t1sp&aj|!lIs1PcQilCyX7%GlRppvK*DvipZvZx#?k1C*ws1mA-s-UW< z8mf+Jpqi)_s*UQPx~Lwij~bwcs1a(6nxLkr8ETGNpq8i=YK_{Uwx}Iyk2;`^s1xdp zx}dJ88|sdFpq{7~>W%uKzNjDSj|QNDXb>8VhM=Kn7#fa7ppj@48jZ%Fv1l9`k0zjr zXcC%?rl6^48k&w~pqXeEnvLe5xo94mj~1YXXc1bBmY}6*8Cs53pp|G9T8-AAwP+n$ zk2au+;sX&ZlVjPqFa2(^97{|moCLN!L z^e6+$h%%wfC=1GpvZ3rK2g-?Zq1-4B%8T-${HOpbhzg;?s0b>GilO4D1S*M2q0*=f zDvQdY@~8r;h$^AVs0ylzs-fzr2C9i_q1vbps*CEO`ltbFh#H~Bs0nI{nxW>X1!{>} zq1LDkYKz*T_NW8uh&rLps0-?fx}ol<2kMD>q28zu>WliJ{%8Oihz6m-Xb2jLhN0nT z1R9A(q0wjz8jHrE@n`~?h$f-QXbPH&rlIL*2AYXxq1k8-nv3S4`Dg)Jh!&y6XbD=1 zmZ9Zn1zL$#q19*&T8q}9^=Jdyh&G|kXbakkwxR842il2tq1|W?+Kcv~{pbKXhz_B{ z=m**APPi*C=dm%SRlqRF^)-1FG8lOABi3p8wH|36o>**APPi* zC~(CBF^-9GO!C8VjALRP6XTe4d>+!H3@9VYgfgQnC@ac_vZEX*C(4C#qdX`t%7^l! z0;nJ=gbJe~s3FW3aBEgges#ds4A+4s-qgHCaQ&MqdKTA zs)y>M2B;xwgc_qJs3~fOnxht|C2ECQqc*55YKPjR4yYsQggT=xs4MD*x}zSbC+dZI zqdurF>WBKH0cao^ga)G_Xeb(nhNBT^BpQWAqcLbK8i&TC31}jkgeIdYXeye9rlT2X zCYpt2qd90Unuq421!y5!gchSEXenBTmZKGDC0d16qcvzPT8Gx74QM0Ugf^osXe-)= zwxb9>T2H9!qfBh(l*K}}IJ)Eu=yEm14f8nr=fQ9INgbwC|a zC)62rL0wTd)E)IeJy9>z8}&hbQ9sll4L}3YAT$^aK||3nG#rgUBhe@{8jV3?(Ks|7 zO+XXTBs3XKK~vE*G#$-AGtn$G8_hv;(L6LCEkFy=BD5GSK}*pxv>dHKE72;n8m&QV z(K@sqZ9p5*CbSuCL0i!_v>ok0JJBw*8|^`R(LS^v9Y6=sA#@lWK}XRsbR3;PC($W% z8l6FB(K&P;C7}zK>(V&phUAlR5Cx(@6o>**APPi*C=dmrK*}x<zR-a-f_j7s`$Dpu8v_%8v@5f~XKGjEbP5s2D1a zN}!Uc6e^9%pt7hODvv6lil`E*jH;ljs2Zw{YM`2^7OIWvpt`6Ys*f6=hNuy0jGCaP zs2OUGTA-Gw6>5#zpth(TYL7agj;IsrjJlw%s2l2zdZ3=D7wV1rpuVUd>W>DXfoKpK zjE110Xc!ufMxc>s6dH}jps{Ei8jmKRiD(jXpuK1x z+K&#PgXj=CjE** zAPPi*)KnnGF)@xwemIVCOpIe<9FvaELwb|}Wki`!W|ReGMcGhxlmq2NxlnGD2jxZi zP<~VZ6-0$lVN?VaMa58YR05SmrBG>929-tSPWHbd$Mbpr9 zGy}~UX0!!u zMcdGJv;*x#yU=d52kk}s(0+6P9YlxFVRQr?MaR%_bON13r_gD12AxIc(0P=EE?lll z*TyFiR%Quh6gtBV3rAPPi*C=dmrKop1qS1b_Y zm>9>TrWYYo)sIAvi;V(NAPPi*C=dmrKoq!Qff&ccI41ewIL0wCj)`$hIzA8SQ3jL| zWkQ)z7L*lbL)lRdloRDbxltaJ7v)3wQ2|sC6+(qk5mXcvL&Z@ER1%d!rBNAF7L`Ng zQ3X^HRYH|f6;u^fL)B3YR1?)gwNV{Z7u7@cQ3KQvHA0P16VwznL(NeO)DpEqtx+4) z7PUj|Q3uo!bwZs{7t|GXL)}pi)D!hWy-^?37xhE^(Ev0M4MKy_5Hu7GL&MPsG!l(M zqtO^N7L7yW(F8ORO+u5=6f_l0L(|a=G!xB2v(X$h7tKTS(E_v(EkcXY60{U8L(9<$ zv=XgCtI-;?7Og|;(FU{;Z9<#T7PJ*@L)*~~v=i+@yU`xB7wtp)(E)T29YTlE5p)zC zL&wnxbP}CHr_mX77M(-qQ4+dvxh{=k{*-(&4x&I5hyqa{3Pgb@5Cx(@6iC?xVjPpQ z?{8dP6o>**APPi*C=dmrKoq!Qfr}3Ur*29lDmA(K*fnVqY0_SG^)-LE_K(+Hf5VM8 z{prtt`Rm_qzU9{2ZolL2|G4w6yZ?F5z4zV!uYW)A;6o2T^5|oaKk?*KPe1eQbI-r< z;!7{T^6G1^zwzc>^q8^Z z#!r|yY4Vh*)27duIcxTux%1{PSh#5MlBLU*uUNTi^_sQo)^FIjY4et?+qUo6xoh{H zz5DhbIC$vrk)y|A924W1DDZzMaPhs-L?Zb&j^AG#OoP&*tI*Zx8uSNrE&3z64qcCK zKsTbB(Es|^&fl-$&-~h7&|lHt(9P%;bSt_I-Hz@+e@Fj7ccQz{-RPg_9&|6d58aRc zh5n5mKo6pa(8K5v^eB1^J&vA0Pok&L)94xWEP4(-k6u78qLa9GrYt{bp{Oei2y+8bG{Z~}v`f(BQl@SpUQzD+Mh=|Di?d`+dh=|4U5fRUv89vz- znHd`q@y|0y8V8OXKlS_i;i)yht&R4gt1aKPjT@gilmF|hj@HicS+k#h@t;BG-I~l< zTmJUu7yEshEZO$I!_TDXo)>4L-T(Mx_Z#=TG<)02AN=^`=8s>TwRP{?-@f<#$1llL zUH`g%Su*qCH~HuHdvunWj~sgXrT5N%s(ka|x3=s#bn1}*&Fd?A&TPAQy8GO#>tFXS zd%3J(aQ~+6(~W12G@N^F;~OW|9^F1P`1W4trt#MKYBE{-&3=fsU5wvQ?}Z+=2-pk(Gh7GcVp+3y*&d* zE`9T1L#XRm_*(v=XMXtU`PY4>R~9{c@#;(Oe}7^fJoDq_J>7oWIe2#H+CF=TRzLI0 z4?n-~#>uXap1FQ;#421G-1*BdKVGrCbFTN+FV}}u5j)3fjKCOyF#=-*#t4iN_#aN- z)<>haBBuOR;VGK*yMH?G_>X_ue)xCK^nX37dv-@eJ{gH_1*3vdVr!t4v<6$nI|Fx; zJA-$M&VZ9RgHCZ;U>cbgoF?uM>?iw!`^61`4P--bgGd5|kRTCL1F0l6m@4iL>?XT| zyTykB50Qt04~bs}z9e4;zZ4e)7LWzO1>$>w_sDy}_rxuMEo4h@i+DM3nOqKD7AFKI zkO{#FqAs8#x}Z+HFK{2ZFL0#A@9f=`GS z0~g7~;6*VzkWI3K+2YHAm&wb)m&G-KHDpb2jd&(-hMWnW5i0@}q#{@$9t|8NM}tSj z=K{}>=Yr3PHv%`vjo=M2HV{iqpo4S-JH(}drDSPvspt)Oi8tsK3j>9uFjy!a z3>+i}g9pX>Ks~7s){7Sc7s!R+1u-L#K{A3F;@-esvNyO_Tpd_VRtHy$rvsELOx zEKo+uf@R{7z!7pJctqSD*iN6c;0cilFoX#*qAg$}wxCVq0vzFjoJa$d&>$7_0(m4am?w4xx=2^BORNjjk-A`= zI2aftgTXmN~pqKOpd&QIQDqO18=#%QI-!}I) z=Qi&)$2QM4*EZis--vs}IpQ60jCe*|BfiAGM0cVy(VOT<^d!0xea1ec+vqfUjSi#7 z=ra12^(}KRb1w5Pb1d^Lb1m~7?>p{3?mX^2?l|r_?mF%(?kjc|JBz)=j$%)-tJv4m z*W>PS_IP_7J)Ry{kFT+>(cS25^fo#gJ&mqL-*Dfsd)PVb9d-&W%wx^jK{`u4f^Irn+@Ire$>x%Tpbf{>p1H< z>pJVJ?5lKFIxD@Ej!I9ZtI~I@@0k0T^O*OTzHpx-wyW<=ML`<#}3a9*ACy! zzMJlw&YRwwj+>sFu9&_U_s!0PT?vMS)&yNbM}jG#B|)3eo?uL9OVB5DCYbfjdX0XU z-k@*Q>+~IZlfFf-)wk=7`Zm2@->Em(G}mZqcGVbaT5EJQ9W|z!mKtqMdyTQCtwvwd zS!3>R?$`A1>NoVa_UrmP`c3^U{o4Naeq(=IzrMe--z;mEX=J-(23f02C+m=zWGynS ztX*c5waN6dPMNv8xm(k{tJ~1s+O6yE=r(n?bZfiYyN%s#-TLm%ZgWF(gQj6ugQ20d zLD$gHU}|V-&^EL;7#rFe^bMU2=Aq^x&CsqP!%*vxZm46(G}JPr9cmvk4z&&GhdPJM z(q^efx=U)1wn}x<4yj4nBGpRUrABF+R4?t6nyt-NjdhpRU~RSPtQ}U9wZ*Enwp)$X zHmly+X*Jh2*J^5a)f#GBYjw39wWivZT5WB6t+BSPR$tp$YaVDG&>J)KNJy~6dE7O42=s-4owW@grY*J zp$VbvP;4k8loZMfMTSyB@u93xOej5+7|IPrhtfg`A!#TsBnc%;A|#VS`OG9{9y6Yq z$z(E%m~qSuW-_yonaIpxa+t+T6f=!UWfm|KnAuD=Q^CYCg-ixh#w0NelgCssk<3&k zg_+O9Gqad1rksgk3Yc`Jlu2ZYnOvrliDsrVX-o-|z{nUWqhR8gB1XcRrJ&9&v*s<{Ym3O9+H$BpM^a+%yBZX7p*o6IfbCUSGQ9Bwfe#ZBW< zxdq$=ZZ?#Tg36;}iI+x}%5zG`?%}!w-Whb!@u=7}+9napw&Sd}0 zX0i{ni`Zw_aqJ)28SG>1WcE+&LiTBPB6~kOhkcUGVOe%D`xiEfy_22BZeUZ{hu8(| z7Ip%AA3K|Ug3V^vuodicY%IH!EoAH240bhJ#%^bm*ySw4a%>)3$5ydTY$SUJJC$9} zrmzpP^I62kv-h&I*vHu{_7S$6eU^=3m#_uwCN`a2#g?+$*hF?2Tg*1Hx$IiDlHI{Z zvv;x6*^O)(yOJ$ox3URr4J%_CSSee}Dp)le$KK5rv71>5t7PS@icMx$uyfg`*nD;! zTg~po5%>>y3Vsw%!Vlnin8)MsJ$NSmGtR^h<3;!xJP!X6&%lr2$@ourA$}T9#P{Pl z_(`0DS-cqk1xMjK@ie>vr{ah30=xxJ!1v+V_z9ei*We2L9FE0HaUrh98F)1=!`pEZ zUXB^e;XGW2t8f#J#CPDScs)+R590Y4;dp#6o`oOBS@;oLj-SObcnL1Rn{YZ_g-h`^ zoQRj6Ll9End3X@Mfcfw)Aov;LK@0c7Col`%g2&-U z$by&P5%?C$;X`;9u0ss8!xHd70UUr$a30cO53GVyPzr}(8;n3A7-1P4hhpe~Mi_=% z*avIjEL6fV*a0^o8rtA4aKUtV9X5gpY48H9gs-3k-iNJl1rk6HHP8<-=!OOu0x4Lb z76w28c2GkY;-C}m1|Jl`8?YI^2MJg}34V}+04fMUGMHfnoP@dX4m<^yARk_Vb#M-< z;Um}y5zq{`_!hne-NE019J~Wf<)@<8_}9>Semx5EL6pL$py&DL(S!Vh=nMV}G@qZ3 z-sRs#h)3vW{$~`=$0IGTMfdXeqEGlw&@6rydW(MxJW}uE#sG=Tv6>gW5BjF+Kqz8f|0 z4QPlTLQ-Ceth^P~^0jDyA3zFTf$Y2;sd+UD^I;Un$DvNX6Wz_(Ai{9bi zK~M2dp-cQFl+Wj*SNK=ZI({8G$Dc#hd^P%r{|N2mccNSTt>`_83Xe4Ee@88jxX6fk z5m8e@VR_db&EtN{VQ!~(N8V2NZ@s0rDe!8>Z4U7DZ@JU$RP%3X)@{`TlG|#Jh`4y0 z2>-C7>hs70Y5(~>T7I9#_3HJN=K|wa-KO$wZfvamzmvdUq9dM(h4Af6f_?s(w(0>$!Q+6Bm8n*EX@@o4{QY%fJ5IHBo_onr>M1<;SlZ zD$e|{(NKQ+8_}Q`{3Y%FMPImIxWD4t?^fPle(LM5?pK_>TJqVVuReSKvxqt8+`1m<7MYI^!FSYtREOYa&EnE-}b>x{r^)r<%3UODHaB|hjUef+d`G0LDjA3Rf&V{&Tep7R*$k0ltWLU!`zzcb#_FYuISot|J9N?t?nPKH zcIc&5+%8BFm+GYD+~43qaj9Nf$r)h2=+#LT+}{C-UcI!MYlV2RP-k7l?S^~BLcMh{ zr-NDIL7laNdl?=V59+N|TnA){^*U=gw-+7}>-E-3&IIM+1)WvFb-}aZ1--SJYk?Rs zLl<6wok;I4&K4fV1l72UvxGn4FHyg{SSqmiGLpG_GKGimr>NWAlP#>l7g2+|Cren3 z&!8c9k5s6`H;~lbm?=Dny~yfr%oZNO7f`LcF-us5Pon{Mqf}UnuOWqdI8#`OPawN{ zI9sU26sg_AS%MM|qOf~dDy+k!D9)Xmsd@lAP^UXLTlFyBDJGc})9AX8Sby zDRdK?$zDj$z!0&S?Ng~2tb{Y!3+QPWAe`Agot^+Yp(c9~9fUBUX7v>6fKHNUQqQ1Y zfRE&v)zj!F@CNBJsSD}1@IC1=tEW;ASV)~oT|iHPpVXPv)9G;#$e>AGM9)Hq44T7J zs0+-b$`qbKzk-vb${e0X`{5nZYYG?A0k}kZ&Ecuk2d|JOQ@DWo;T&l)ho{q%@DUj` zg^TDph$OKF=}ayFcaT`4bPo3uOeGx#>1^%?SWh~PQil5lQpi$+bQbp=JV=%rrN!J; zm`}V0sf_zKAmTMj=W@S7JSj9-XL1DYC51-o9PTpAA_ooD+1y2VoE$V-8SVyTk$Qu5 z7Iy(2A@xRUF?S8h$pwQ|#!+~dTrgVaa-$GKG7PmdX*Ejl6{iY+#v72{SDYd|h95#T zzT!0DNxTL1`+6n`8}NNd=Iconp1@C_ZeLG|P>vXH0c#pbQh$F%XNx!`Zri9F4rq6 zsR2sF6FP;0{vEc8C-jPH+6oCGqq8rfyP-y8^!CM62Qty7vsciUp+U6i?Nzh`q#~!Y zm(#saD{^{!B{hLUq&mBTc7a-?dV4i(fjBWwr(Q&#hr7i*y?Qa#LXp^|Q&-TJV6)hz zS69(?kcf3UbvfMwO0iC_uB1kgi-S6~g6;#AIH*@w(>6#Jt90Q-^aWTUR_ViwsUGHv zy}ED(wZcZ1=fj8`fw#RL$x@n3oGa=uv3iHNat~{LW>xyl`iD| z2@bJCBVE9~39pGA+TSjF0tCgS8tHuQAMm`mR4XmzK7cPouSQzJy$$b*UaeHl{R@5; z3pLhx+9u4p0E_Zj2b*Wxx#XM6b*TL@`XEa2arQk}NkWis@A-Atww98T~hGB`1uE zx%5{^AdJC2lM<*QjL|-aUIrPl8SJy^MQ9*4qn)8QKuS1+eHOg{wS+U;i|I8`5Nfc? zC_s^`$3p@?)D)U)Z2u$goj)eOB35>jVS&!Xo+N$QO1 zVmbnHGH6iC=rE|rpiw=S-h^aQWeCrtBCH@)#_$|^1?G}oLwGhFf~QEYG0e~~)65^I)D<35JhNQX&U$bADr z(qWcP<=pT*S!$9Na9_h0WT{y?o%;;lC0>)Xh{M0#efFBIQ#dDRNukL)gZmObA%$k^ zH11P)iySmr3%N7!BROccPUXDt5~(*?3%JwpEvYwKr*kLZLvq1nE#d~@I=Nu3ox(Yw zos6alJf4R1-r@}58Jvo0yv6Cl(|7^u_ZCZpzu*Z-=Ixm*Y{9cpx3?!lcn)Wy25(Qg zupL*RA#aaFXu`2b>TR4XAY6#7-o^~!S)74ty^ZO@He7}Vyp0lJ2Tno?@9<<{D`t@0 zJDee?aUN28htmZWu0mn&uteC2BT<|;ce0AdQ&FckHzRZoT%t*%SU}%|*F}?7QAPno zak)k@pZ)`05SMEerSt>%N<5)al+d@~eer}=A*cU>DeMXb}ROX*?oi-Q_<3GD$v9Mr1i^caN1 zDouDEeI3kVl{UPPz7HqGUQKub?S^;6UTwIH+ToJeqzTWbZ@?>JlQvvR1vn>;YQiP- z9r#Eb)rRHVm;o9yKw}1I%m9rUpfLk9W`O>y4AAJ@#M{ljZk5cq&G_cmlMmdc!cWzA zMcr$asSt6iWX|o)5b^6t{x%hUs&@Uq+xY3$2e + + See LICENSES/BLOSC.txt for details about copyright and rights to use. +**********************************************************************/ + +#include +#include +#include + +#if defined(_WIN32) && !defined(__MINGW32__) + #include + /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ + #if defined(_MSC_VER) && _MSC_VER < 1600 + #include "win32/stdint-windows.h" + #else + #include + #endif +#else + #include +#endif /* _WIN32 */ + +#ifdef __HAIKU__ +/* int32_t declared here */ +#include +#endif + +#define SIZE (1000 * 1000) + + +int main(int argc, char *argv[]) { + static int32_t data[SIZE]; + static int32_t data_out[SIZE]; + static int32_t data_dest[SIZE]; + size_t isize = SIZE * sizeof(int32_t); + size_t osize = SIZE * sizeof(int32_t); + int dsize = SIZE * sizeof(int32_t); + int csize; + long fsize; + int i; + + FILE *f; + + /* Register the filter with the library */ + printf("Blosc version info: %s (%s)\n", BLOSC_VERSION_STRING, BLOSC_VERSION_DATE); + + /* Initialize the Blosc compressor */ + blosc_init(); + + /* Use the argv[2] compressor. The supported ones are "blosclz", + "lz4", "lz4hc", "snappy", "zlib" and "zstd"*/ + blosc_set_compressor(argv[2]); + + if (strcmp(argv[1], "compress") == 0) { + + for (i = 0; i < SIZE; i++) { + data[i] = i; + } + + /* Compress with clevel=9 and shuffle active */ + csize = blosc_compress(9, 1, sizeof(int32_t), isize, data, data_out, osize); + if (csize == 0) { + printf("Buffer is uncompressible. Giving up.\n"); + return 1; + } else if (csize < 0) { + printf("Compression error. Error code: %d\n", csize); + return csize; + } + + printf("Compression: %d -> %d (%.1fx)\n", (int) isize, csize, (1. * isize) / csize); + + /* Write data_out to argv[3] */ + f = fopen(argv[3], "wb+"); + if (fwrite(data_out, 1, (size_t) csize, f) == SIZE) { + printf("Wrote %s\n", argv[3]); + } else { + printf("Write failed"); + } + } else { + /* Read from argv[2] into data_out. */ + f = fopen(argv[2], "rb"); + fseek(f, 0, SEEK_END); + fsize = ftell(f); + fseek(f, 0, SEEK_SET); + if (fread(data_out, 1, (size_t) fsize, f) == fsize) { + printf("Checking %s\n", argv[2]); + } else { + printf("Read failed"); + } + + /* Decompress */ + dsize = blosc_decompress(data_out, data_dest, (size_t) dsize); + if (dsize < 0) { + printf("Decompression error. Error code: %d\n", dsize); + return dsize; + } + + printf("Decompression succesful!\n"); + } + + /* After using it, destroy the Blosc environment */ + blosc_destroy(); + + return 0; +} diff --git a/c-blosc/conanfile.py b/c-blosc/conanfile.py new file mode 100644 index 0000000..a0a795b --- /dev/null +++ b/c-blosc/conanfile.py @@ -0,0 +1,73 @@ +import os +from conans import ConanFile, CMake, tools + + +class CbloscConan(ConanFile): + name = "c-blosc" + description = "An extremely fast, multi-threaded, meta-compressor library" + license = "BSD" + url = "https://github.com/Blosc/c-blosc" + settings = "os", "compiler", "build_type", "arch" + options = {"shared": [True, False]} + default_options = "shared=False" + generators = "cmake" + exports_sources = "*", "!test_package/*", "!appveyor*", "!.*.yml", "!*.py", "!.*" + + @property + def run_tests(self): + return "CONAN_RUN_TESTS" in os.environ + + def build(self): + os.mkdir("build") + tools.replace_in_file("CMakeLists.txt", "project(blosc)", '''project(blosc) + include(${CMAKE_BINARY_DIR}/../conanbuildinfo.cmake) + conan_basic_setup(NO_OUTPUT_DIRS)''') + cmake = CMake(self) + cmake.definitions["BUILD_TESTS"] = "ON" if self.run_tests else "OFF" + cmake.definitions["BUILD_BENCHMARKS"] = "ON" if self.run_tests else "OFF" + cmake.definitions["BUILD_SHARED"] = "ON" if (self.options.shared or self.run_tests) else "OFF" + cmake.definitions["BUILD_STATIC"] = "OFF" if self.options.shared else "ON" + cmake.configure(build_folder="build") + cmake.build() + + if self.run_tests: + self.output.warn("Running tests!!") + self.launch_tests() + + def launch_tests(self): + """Conan will remove rpaths from shared libs to be able to reuse the shared libs, we need + to tell the tests where to find the shared libs""" + test_args = "-VV" if tools.os_info.is_windows else "" + with tools.chdir("build"): + outdir = os.path.join(self.build_folder, "build", "blosc") + if tools.os_info.is_macos: + prefix = "DYLD_LIBRARY_PATH=%s" % outdir + elif tools.os_info.is_windows: + prefix = "PATH=%s;%%PATH%%" % outdir + elif tools.os_info.is_linux: + prefix = "LD_LIBRARY_PATH=%s" % outdir + else: + return + self.run("%s ctest %s" % (prefix, test_args)) + + def package(self): + self.copy("blosc.h", dst="include", src="blosc") + self.copy("blosc-export.h", dst="include", src="blosc") + self.copy("*libblosc.a", dst="lib", keep_path=False) + + if self.options.shared: + self.copy("*/blosc.lib", dst="lib", keep_path=False) + self.copy("*blosc.dll", dst="bin", keep_path=False) + self.copy("*blosc.*dylib*", dst="lib", keep_path=False, symlinks=True) + self.copy("*blosc.so*", dst="lib", keep_path=False, symlinks=True) + self.copy("*libblosc.dll.a", dst="lib", keep_path=False) # Mingw + else: + self.copy("*libblosc.lib", dst="lib", src="", keep_path=False) + + def package_info(self): + if self.settings.compiler == "Visual Studio" and not self.options.shared: + self.cpp_info.libs = ["libblosc"] + else: + self.cpp_info.libs = ["blosc"] + if self.settings.os == "Linux": + self.cpp_info.libs.append("pthread") diff --git a/c-blosc/examples/README.rst b/c-blosc/examples/README.rst new file mode 100644 index 0000000..843473c --- /dev/null +++ b/c-blosc/examples/README.rst @@ -0,0 +1,12 @@ +Examples on how to add Blosc support for your programs +====================================================== + +In this directory you can find a series of examples on how to link +your apps with the Blosc library: + +* simple.c -- The simplest way to add Blosc to your app +* multithread.c -- Add multithreading into the equation +* many_compressors.c -- Use different compressors inside Blosc + +For more info, please visit the `official API documentation +`_. diff --git a/c-blosc/examples/many_compressors.c b/c-blosc/examples/many_compressors.c new file mode 100644 index 0000000..cafc79e --- /dev/null +++ b/c-blosc/examples/many_compressors.c @@ -0,0 +1,117 @@ +/* + Example program demonstrating use of the Blosc filter from C code. + + To compile this program: + + $ gcc many_compressors.c -o many_compressors -lblosc -lpthread + + or, if you don't have the blosc library installed: + + $ gcc -O3 -msse2 many_compressors.c ../blosc/*.c -I../blosc \ + -o many_compressors -lpthread \ + -DHAVE_ZLIB -lz -DHAVE_LZ4 -llz4 -DHAVE_SNAPPY -lsnappy + + Using MSVC on Windows: + + $ cl /Ox /Femany_compressors.exe /Iblosc many_compressors.c blosc\*.c + + To run: + + $ ./many_compressors + Blosc version info: 1.10.0.dev ($Date:: 2016-07-20 #$) + Using 4 threads (previously using 1) + Using blosclz compressor + Compression: 4000000 -> 158788 (25.2x) + Succesful roundtrip! + Using lz4 compressor + Compression: 4000000 -> 235419 (17.0x) + Succesful roundtrip! + Using lz4hc compressor + Compression: 4000000 -> 38314 (104.4x) + Succesful roundtrip! + Using snappy compressor + Compression: 4000000 -> 311617 (12.8x) + Succesful roundtrip! + Using zlib compressor + Compression: 4000000 -> 22103 (181.0x) + Succesful roundtrip! + Using zstd compressor + Compression: 4000000 -> 11813 (338.6x) + Succesful roundtrip! + +*/ + +#include +#include + +#define SIZE 100*100*100 +#define SHAPE {100,100,100} +#define CHUNKSHAPE {1,100,100} + +int main(){ + static float data[SIZE]; + static float data_out[SIZE]; + static float data_dest[SIZE]; + int isize = SIZE*sizeof(float), osize = SIZE*sizeof(float); + int dsize = SIZE*sizeof(float), csize; + int nthreads, pnthreads, i; + char* compressors[] = {"blosclz", "lz4", "lz4hc", "snappy", "zlib", "zstd"}; + int ccode, rcode; + + for(i=0; i %d (%.1fx)\n", isize, csize, (1.*isize) / csize); + + /* Decompress */ + dsize = blosc_decompress(data_out, data_dest, dsize); + if (dsize < 0) { + printf("Decompression error. Error code: %d\n", dsize); + return dsize; + } + + /* After using it, destroy the Blosc environment */ + blosc_destroy(); + + for(i=0;i 158494 (25.2x) + Succesful roundtrip! + Using 2 threads (previously using 1) + Compression: 4000000 -> 158494 (25.2x) + Succesful roundtrip! + Using 3 threads (previously using 2) + Compression: 4000000 -> 158494 (25.2x) + Succesful roundtrip! + Using 4 threads (previously using 3) + Compression: 4000000 -> 158494 (25.2x) + Succesful roundtrip! + +*/ + +#include +#include + +#define SIZE 1000*1000 + + +int main(){ + static float data[SIZE]; + static float data_out[SIZE]; + static float data_dest[SIZE]; + int isize = SIZE*sizeof(float), osize = SIZE*sizeof(float); + int dsize = SIZE*sizeof(float), csize; + int nthreads, pnthreads, i; + + for(i=0; i %d (%.1fx)\n", isize, csize, (1.*isize) / csize); + + /* Decompress */ + dsize = blosc_decompress(data_out, data_dest, dsize); + if (dsize < 0) { + printf("Decompression error. Error code: %d\n", dsize); + return dsize; + } + + for(i=0;i 158788 (25.2x) + Decompression succesful! + Succesful roundtrip! + +*/ + +#include +#include + +#define SIZE 100*100*100 + +int main(){ + static float data[SIZE]; + static float data_out[SIZE]; + static float data_dest[SIZE]; + int isize = SIZE*sizeof(float), osize = SIZE*sizeof(float); + int dsize = SIZE*sizeof(float), csize; + int i; + + for(i=0; i %d (%.1fx)\n", isize, csize, (1.*isize) / csize); + + /* Decompress */ + dsize = blosc_decompress(data_out, data_dest, dsize); + if (dsize < 0) { + printf("Decompression error. Error code: %d\n", dsize); + return dsize; + } + + printf("Decompression succesful!\n"); + + for(i=0;i 158494 (25.2x) + Decompression succesful! + Succesful roundtrip! + +*/ + +#include +#include + +#define SIZE 100*100*100 + +int main(){ + static float data[SIZE]; + static float data_out[SIZE]; + static float data_dest[SIZE]; + int isize = SIZE*sizeof(float), osize = SIZE*sizeof(float); + int dsize = SIZE*sizeof(float), csize; + int i; + + for(i=0; i %d (%.1fx)\n", isize, csize, (1.*isize) / csize); + + /* Decompress */ + dsize = blosc_decompress(data_out, data_dest, dsize); + if (dsize < 0) { + printf("Decompression error. Error code: %d\n", dsize); + return dsize; + } + + printf("Decompression succesful!\n"); + + /* After using it, destroy the Blosc environment */ + blosc_destroy(); + + for(i=0;i 19928862 (20.1x) + Decompression succesful! + Succesful roundtrip! + +*/ + +#include +#include +#include + +#define SIZE 100*1000*1000 +#define SHAPE {100,1000,1000} +#define CHUNKSHAPE {1,1000,1000} + +/* Definition for the compression and decompression blosc routines */ +typedef int (__cdecl *COMPRESS_CTX)(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void* src, void* dest, + size_t destsize, const char* compressor, + size_t blocksize, int numinternalthreads); + +typedef int (__cdecl *DECOMPRESS_CTX)(const void *src, void *dest, + size_t destsize, int numinternalthreads); +typedef char* (__cdecl *GET_VERSION_STRING)(void); + + +int main(){ + HINSTANCE BDLL; /* Handle to DLL */ + COMPRESS_CTX blosc_compress_ctx; /* Function pointer for compression */ + DECOMPRESS_CTX blosc_decompress_ctx; /* Function pointer for decompression */ + GET_VERSION_STRING blosc_get_version_string; + + static float data[SIZE]; + static float data_out[SIZE]; + static float data_dest[SIZE]; + int isize = SIZE*sizeof(float), osize = SIZE*sizeof(float); + int dsize = SIZE*sizeof(float), csize; + int i; + + BDLL = LoadLibrary(TEXT("myblosc.dll")); + if (BDLL == NULL) { + printf("Cannot find myblosc.dll library!\n"); + goto out; + } + + blosc_compress_ctx = (COMPRESS_CTX)GetProcAddress(BDLL, "blosc_compress_ctx"); + if (!blosc_compress_ctx) { + // handle the error + printf("Cannot find blosc_compress_ctx() function!\n"); + goto out; + } + + blosc_decompress_ctx = (DECOMPRESS_CTX)GetProcAddress(BDLL, "blosc_decompress_ctx"); + if (!blosc_decompress_ctx) { + // handle the error + printf("Cannot find blosc_decompress_ctx() function!\n"); + goto out; + } + + blosc_get_version_string = (GET_VERSION_STRING)GetProcAddress(BDLL, "blosc_get_version_string"); + if (!blosc_get_version_string) { + // handle the error + printf("Cannot find blosc_get_version_string() function!\n"); + goto out; + } + + for(i=0; i %d (%.1fx)\n", isize, csize, (1.*isize) / csize); + + /* Decompress */ + dsize = blosc_decompress_ctx(data_out, data_dest, dsize, 1); + if (dsize < 0) { + printf("Decompression error. Error code: %d\n", dsize); + return dsize; + } + + printf("Decompression succesful!\n"); + + for(i=0;i 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +#include "lz4.h" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# include +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# ifdef _MSC_VER /* Visual Studio */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE + * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) +# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2"))) +# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE +#else +# define LZ4_FORCE_O2_GCC_PPC64LE +# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + +/*-************************************ +* Memory routines +**************************************/ +#include /* malloc, calloc, free */ +#define ALLOCATOR(n,s) calloc(n,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/*-************************************ +* Basic Types +**************************************/ +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +/*-************************************ +* Reading and writing into memory +**************************************/ +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; + +static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } + +#else /* safe and portable access through memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +static void LZ4_copy8(void* dst, const void* src) +{ + memcpy(dst,src,8); +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_O2_INLINE_GCC_PPC64LE +void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_copy8(d,s); d+=8; s+=8; } while (d=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include +static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +/*-************************************ +* Common functions +**************************************/ +static unsigned LZ4_NbCommonBytes (reg_t val) +{ + if (LZ4_isLittleEndian()) { + if (sizeof(val)==8) { +# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (int)(r>>3); +# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward( &r, (U32)val ); + return (int)(r>>3); +# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { /* 64-bits */ +# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll((U64)val) >> 3); +# else + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } + } +} + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; +typedef enum { byPtr, byU32, byU16 } tableType_t; + +typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { full = 0, partial = 1 } earlyEnd_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } +int LZ4_sizeofState() { return LZ4_STREAMSIZE; } + + +/*-****************************** +* Compression functions +********************************/ +static U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +static U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + static const U64 prime5bytes = 889523592379ULL; + static const U64 prime8bytes = 11400714785074694791ULL; + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + else + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + return LZ4_hash4(LZ4_read32(p), tableType); +} + +static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) +{ + switch (tableType) + { + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } + if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } + { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + + +/** LZ4_compress_generic() : + inlined, to ensure branches are decided at compilation time */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + const int maxOutputSize, + const limitedOutput_directive outputLimited, + const tableType_t tableType, + const dict_directive dict, + const dictIssue_directive dictIssue, + const U32 acceleration) +{ + const BYTE* ip = (const BYTE*) source; + const BYTE* base; + const BYTE* lowLimit; + const BYTE* const lowRefLimit = ip - cctx->dictSize; + const BYTE* const dictionary = cctx->dictionary; + const BYTE* const dictEnd = dictionary + cctx->dictSize; + const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 forwardH; + + /* Init conditions */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported inputSize, too large (or negative) */ + switch(dict) + { + case noDict: + default: + base = (const BYTE*)source; + lowLimit = (const BYTE*)source; + break; + case withPrefix64k: + base = (const BYTE*)source - cctx->currentOffset; + lowLimit = (const BYTE*)source - cctx->dictSize; + break; + case usingExtDict: + base = (const BYTE*)source - cctx->currentOffset; + lowLimit = (const BYTE*)source; + break; + } + if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ + if (inputSizehashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + ptrdiff_t refDelta = 0; + const BYTE* match; + BYTE* token; + + /* Find a match */ + { const BYTE* forwardIp = ip; + unsigned step = 1; + unsigned searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimit)) goto _last_literals; + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); + if (dict==usingExtDict) { + if (match < (const BYTE*)source) { + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE*)source; + } } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); + + } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0) + || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) + || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) ); + } + + /* Catch up */ + while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputLimited) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) + return 0; + if (litLength >= RUN_MASK) { + int len = (int)litLength-RUN_MASK; + *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += MINMATCH + matchCode; + if (ip==limit) { + unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += MINMATCH + matchCode; + } + + if ( outputLimited && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) + return 0; + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + + anchor = ip; + + /* Test end of chunk */ + if (ip > mflimit) break; + + /* Fill table */ + LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); + + /* Test next position */ + match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); + if (dict==usingExtDict) { + if (match < (const BYTE*)source) { + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE*)source; + } } + LZ4_putPosition(ip, cctx->hashTable, tableType, base); + if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1) + && (match+MAX_DISTANCE>=ip) + && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + } + +_last_literals: + /* Encode Last Literals */ + { size_t const lastRun = (size_t)(iend - anchor); + if ( (outputLimited) && /* Check output buffer overflow */ + ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) ) + return 0; + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun<internal_donotuse; + LZ4_resetStream((LZ4_stream_t*)state); + if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; + + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + else + return LZ4_compress_generic(ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration); + } else { + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + else + return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration); + } +} + + +int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ +#if (LZ4_HEAPMODE) + void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ +#else + LZ4_stream_t ctx; + void* const ctxPtr = &ctx; +#endif + + int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1); +} + + +/* hidden debug function */ +/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */ +int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t ctx; + LZ4_resetStream(&ctx); + + if (inputSize < LZ4_64Klimit) + return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + else + return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration); +} + + +/*-****************************** +* *_destSize() variant +********************************/ + +static int LZ4_compress_destSize_generic( + LZ4_stream_t_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + const int targetDstSize, + const tableType_t tableType) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* base = (const BYTE*) src; + const BYTE* lowLimit = (const BYTE*) src; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + targetDstSize; + BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */; + BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */); + BYTE* const oMaxSeq = oMaxLit - 1 /* token */; + + U32 forwardH; + + + /* Init conditions */ + if (targetDstSize < 1) return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ + if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ + if (*srcSizePtrhashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + + /* Find a match */ + { const BYTE* forwardIp = ip; + unsigned step = 1; + unsigned searchMatchNb = 1 << LZ4_skipTrigger; + + do { + U32 h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimit)) goto _last_literals; + + match = LZ4_getPositionOnHash(h, ctx->hashTable, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, ctx->hashTable, tableType, base); + + } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + } + + /* Catch up */ + while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } + + /* Encode Literal length */ + { unsigned litLength = (unsigned)(ip - anchor); + token = op++; + if (op + ((litLength+240)/255) + litLength > oMaxLit) { + /* Not enough space for a last match */ + op--; + goto _last_literals; + } + if (litLength>=RUN_MASK) { + unsigned len = litLength - RUN_MASK; + *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< oMaxMatch) { + /* Match description too long : reduce it */ + matchLength = (15-1) + (oMaxMatch-op) * 255; + } + ip += MINMATCH + matchLength; + + if (matchLength>=ML_MASK) { + *token += ML_MASK; + matchLength -= ML_MASK; + while (matchLength >= 255) { matchLength-=255; *op++ = 255; } + *op++ = (BYTE)matchLength; + } + else *token += (BYTE)(matchLength); + } + + anchor = ip; + + /* Test end of block */ + if (ip > mflimit) break; + if (op > oMaxSeq) break; + + /* Fill table */ + LZ4_putPosition(ip-2, ctx->hashTable, tableType, base); + + /* Test next position */ + match = LZ4_getPosition(ip, ctx->hashTable, tableType, base); + LZ4_putPosition(ip, ctx->hashTable, tableType, base); + if ( (match+MAX_DISTANCE>=ip) + && (LZ4_read32(match)==LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); + if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) { + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (oend-op) - 1; + lastRunSize -= (lastRunSize+240)/255; + } + ip = anchor + lastRunSize; + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize<= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) + return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, byU16); + else + return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, sizeof(void*)==8 ? byU32 : byPtr); + } +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64); + LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + LZ4_resetStream(lz4s); + return lz4s; +} + +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(4, "LZ4_resetStream"); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); +} + +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + FREEMEM(LZ4_stream); + return (0); +} + + +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + const BYTE* base; + + if ((dict->initCheck) || (dict->currentOffset > 1 GB)) /* Uninitialized structure, or reuse overflow */ + LZ4_resetStream(LZ4_dict); + + if (dictSize < (int)HASH_UNIT) { + dict->dictionary = NULL; + dict->dictSize = 0; + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + dict->currentOffset += 64 KB; + base = p - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->currentOffset += dict->dictSize; + + while (p <= dictEnd-HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, byU32, base); + p+=3; + } + + return dict->dictSize; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) +{ + if ((LZ4_dict->currentOffset > 0x80000000) || + ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) { /* address space overflow */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse; + const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; + + const BYTE* smallest = (const BYTE*) source; + if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ + if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd; + LZ4_renormDictT(streamPtr, smallest); + if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) source + inputSize; + if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == (const BYTE*)source) { + int result; + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration); + else + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration); + streamPtr->dictSize += (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + return result; + } + + /* external dictionary mode */ + { int result; + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration); + else + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration); + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize) +{ + LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; + int result; + const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; + + const BYTE* smallest = dictEnd; + if (smallest > (const BYTE*) source) smallest = (const BYTE*) source; + LZ4_renormDictT(streamPtr, smallest); + + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + streamPtr->currentOffset += (U32)inputSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). + * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + + if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize; + + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-***************************** +* Decompression functions +*******************************/ +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_INLINE int LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + int endOnInput, /* endOnOutputSize, endOnInputSize */ + int partialDecoding, /* full, partial */ + int targetOutputSize, /* only used if partialDecoding==partial */ + int dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + BYTE* oexit = op + targetOutputSize; + + const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; + const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; + const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; + + const int safeDecode = (endOnInput==endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + + + /* Special cases */ + if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */ + if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + + /* Main Loop : decode sequences */ + while (1) { + size_t length; + const BYTE* match; + size_t offset; + + unsigned const token = *ip++; + + /* shortcut for common case : + * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes). + * this shortcut was tested on x86 and x64, where it improves decoding speed. + * it has not yet been benchmarked on ARM, Power, mips, etc. */ + if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend) + & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend)) + & ((token < (15<> ML_BITS; + size_t const off = LZ4_readLE16(ip+ll); + const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */ + if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) { + size_t const ml = (token & ML_MASK) + MINMATCH; + memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/; + memcpy(op, matchPtr, 18); op += ml; + continue; + } + } + + /* decode literal length */ + if ((length=(token>>ML_BITS)) == RUN_MASK) { + unsigned s; + do { + s = *ip++; + length += s; + } while ( likely(endOnInput ? ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + { + if (partialDecoding) { + if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ + } else { + if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ + } + memcpy(op, ip, length); + ip += length; + op += length; + break; /* Necessarily EOF, due to parsing restrictions */ + } + LZ4_wildCopy(op, ip, cpy); + ip += length; op = cpy; + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + unsigned s; + do { + s = *ip++; + if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error; + length += s; + } while (s==255); + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + + /* check external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */ + + if (length <= (size_t)(lowPrefix-match)) { + /* match can be copied as a single segment from external dictionary */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match encompass external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix-match); + size_t const restSize = length - copySize; + memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op-lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + if (unlikely(offset<8)) { + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { LZ4_copy8(op, match); match+=8; } + op += 8; + + if (unlikely(cpy>oend-12)) { + BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op16) LZ4_wildCopy(op+8, match+8, cpy); + } + op = cpy; /* correction */ + } + + /* end of decoding */ + if (endOnInput) + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + else + return (int) (((const char*)ip)-src); /* Nb of input bytes read */ + + /* Overflow error detected */ +_output_error: + return (int) (-(((const char*)ip)-src))-1; +} + + +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB); +} + + +/*===== streaming decompression functions =====*/ + +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t)); + return lz4s; +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} + +/*! + * LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * Return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t) dictSize; + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixEnd == (BYTE*)dest) { + result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, full, 0, + usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += result; + lz4sd->prefixEnd += result; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, full, 0, + usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixEnd == (BYTE*)dest) { + result = LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, + usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, + usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0); + if (dictStart+dictSize == dest) { + if (dictSize >= (int)(64 KB - 1)) + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0); + } + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize); +} + +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize); +} + +/* debug function */ +LZ4_FORCE_O2_GCC_PPC64LE +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); } +int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); } +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); } +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); } +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); } +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); } + +/* +These function names are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } + + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } + +static void LZ4_init(LZ4_stream_t* lz4ds, BYTE* base) +{ + MEM_INIT(lz4ds, 0, sizeof(LZ4_stream_t)); + lz4ds->internal_donotuse.bufferStart = base; +} + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + if ((((uptrval)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ + LZ4_init((LZ4_stream_t*)state, (BYTE*)inputBuffer); + return 0; +} + +void* LZ4_create (char* inputBuffer) +{ + LZ4_stream_t* lz4ds = (LZ4_stream_t*)ALLOCATOR(8, sizeof(LZ4_stream_t)); + LZ4_init (lz4ds, (BYTE*)inputBuffer); + return lz4ds; +} + +char* LZ4_slideInputBuffer (void* LZ4_Data) +{ + LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)LZ4_Data)->internal_donotuse; + int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB); + return (char*)(ctx->bufferStart + dictSize); +} + +/* Obsolete streaming decompression functions */ + +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); +} + +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); +} + +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/c-blosc/internal-complibs/lz4-1.8.1.2/lz4.h b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4.h new file mode 100644 index 0000000..a06b8a4 --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4.h @@ -0,0 +1,479 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2017, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef LZ4_H_2983827168210 +#define LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include /* size_t */ + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h provides block compression functions. It gives full buffer control to user. + Decompressing an lz4-compressed block also requires metadata (such as compressed size). + Each application is free to encode such metadata in whichever way it wants. + + An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md), + take care of encoding standard metadata alongside LZ4-compressed blocks. + If your application requires interoperability, it's recommended to use it. + A library is provided to take care of it, see lz4frame.h. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 8 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; to be used when checking dll version */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; to be used when checking dll version */ + + +/*-************************************ +* Tuning parameter +**************************************/ +/*! + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE 14 +#endif + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + Compresses 'srcSize' bytes from buffer 'src' + into already allocated 'dst' buffer of size 'dstCapacity'. + Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + It also runs faster, so it's a recommended setting. + If the function cannot compress 'src' into a limited 'dst' budget, + compression stops *immediately*, and the function result is zero. + As a consequence, 'dst' content is not valid. + This function never writes outside 'dst' buffer, nor read outside 'source' buffer. + srcSize : supported max value is LZ4_MAX_INPUT_VALUE + dstCapacity : full or partial size of buffer 'dst' (which must be already allocated) + return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + or 0 if compression fails */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + compressedSize : is the exact complete size of the compressed block. + dstCapacity : is the size of destination buffer, which must be already allocated. + return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + If destination buffer is not large enough, decoding will stop and output an error code (negative value). + If the source stream is detected malformed, the function will stop decoding and return a negative result. + This function is protected against buffer overflow exploits, including malicious data packets. + It never writes outside output buffer, nor reads outside input buffer. +*/ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! +LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! +LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows to select an "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1. +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! +LZ4_compress_fast_extState() : + Same compression function, just using an externally allocated memory space to store compression state. + Use LZ4_sizeofState() to know how much memory must be allocated, + and allocate it on 8-bytes boundaries (using malloc() typically). + Then, provide it as 'void* state' to compression function. +*/ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! +LZ4_compress_destSize() : + Reverse the logic : compresses as much data as possible from 'src' buffer + into already allocated buffer 'dst' of size 'targetDestSize'. + This function either compresses the entire 'src' content into 'dst' if it's large enough, + or fill 'dst' buffer completely with as much data as possible from 'src'. + *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + New value is necessarily <= old value. + return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + or 0 if compression fails +*/ +LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); + + +/*! +LZ4_decompress_fast() : (unsafe!!) + originalSize : is the original uncompressed size + return : the number of bytes read from the source buffer (in other words, the compressed size) + If the source stream is detected malformed, the function will stop decoding and return a negative result. + Destination buffer must be already allocated. Its size must be >= 'originalSize' bytes. + note : This function respects memory boundaries for *properly formed* compressed data. + It is a bit faster than LZ4_decompress_safe(). + However, it does not provide any protection against intentionally modified data stream (malicious input). + Use this function in trusted environment only (data to decode comes from a trusted source). +*/ +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); + +/*! +LZ4_decompress_safe_partial() : + This function decompress a compressed block of size 'srcSize' at position 'src' + into destination buffer 'dst' of size 'dstCapacity'. + The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that. + However, it's not accurate, and may write more than 'targetOutputSize' (but <= dstCapacity). + @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity) + Note : this number can be < 'targetOutputSize' should the compressed block contain less data. + Always control how many bytes were decoded. + If the source stream is detected malformed, the function will stop decoding and return a negative result. + This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets. +*/ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +/*! LZ4_createStream() and LZ4_freeStream() : + * LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure. + * LZ4_freeStream() releases its memory. + */ +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure can be allocated once and re-used multiple times. + * Use this function to start compressing a new stream. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to load a static dictionary into LZ4_stream_t. + * Any previous data will be forgotten, only 'dictionary' will remain in memory. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : dictionary size, in bytes (necessarily <= 64 KB) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_compress_fast_continue() : + * Compress content into 'src' using data from previously compressed blocks, improving compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * Important : Up to 64KB of previously compressed data is assumed to remain present and unmodified in memory ! + * Special 1 : If input buffer is a double-buffer, it can have any size, including < 64 KB. + * Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * @return : size of compressed block + * or 0 if there is an error (typically, compressed data cannot fit into 'dst') + * After an error, the stream status is invalid, it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * Note : it's not necessary to call LZ4_loadDict() after LZ4_saveDict(), dictionary is immediately usable. + * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* incomplete type (defined later) */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking structure. + * A tracking structure can be re-used multiple times sequentially. */ +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t structure can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionnally be set. Use NULL or size 0 for a simple reset order. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decompress_*_continue() : + * These decoding functions allow decompression of consecutive blocks in "streaming" mode. + * A block is an unsplittable entity, it must be presented entirely to a decompression function. + * Decompression functions only accept one block at a time. + * Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB). + * + * Special : if application sets a ring buffer for decompression, it must respect one of the following conditions : + * - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) + * In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. It's the maximum size of any single block. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including larger than decoding buffer. + * Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, + * and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block. +*/ +LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); + + +/*! LZ4_decompress_*_usingDict() : + * These decoding functions work the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() + * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + */ +LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + + +/*^********************************************** + * !!!!!! STATIC LINKING ONLY !!!!!! + ***********************************************/ +/*-************************************ + * Private definitions + ************************************** + * Do not use these definitions. + * They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Using these definitions will expose code to API and/or ABI break in future versions of the library. + **************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#include + +typedef struct { + uint32_t hashTable[LZ4_HASH_SIZE_U32]; + uint32_t currentOffset; + uint32_t initCheck; + const uint8_t* dictionary; + uint8_t* bufferStart; /* obsolete, used for slideInputBuffer */ + uint32_t dictSize; +} LZ4_stream_t_internal; + +typedef struct { + const uint8_t* externalDict; + size_t extDictSize; + const uint8_t* prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + +#else + +typedef struct { + unsigned int hashTable[LZ4_HASH_SIZE_U32]; + unsigned int currentOffset; + unsigned int initCheck; + const unsigned char* dictionary; + unsigned char* bufferStart; /* obsolete, used for slideInputBuffer */ + unsigned int dictSize; +} LZ4_stream_t_internal; + +typedef struct { + const unsigned char* externalDict; + size_t extDictSize; + const unsigned char* prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + +#endif + +/*! + * LZ4_stream_t : + * information structure to track an LZ4 stream. + * init this structure before first use. + * note : only use in association with static linking ! + * this definition is not API/ABI safe, + * it may change in a future version ! + */ +#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) +#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) +union LZ4_stream_u { + unsigned long long table[LZ4_STREAMSIZE_U64]; + LZ4_stream_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_stream_t */ + + +/*! + * LZ4_streamDecode_t : + * information structure to track an LZ4 stream during decompression. + * init this structure using LZ4_setStreamDecode (or memset()) before first use + * note : only use in association with static linking ! + * this definition is not API/ABI safe, + * and may change in a future version ! + */ +#define LZ4_STREAMDECODESIZE_U64 4 +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) +union LZ4_streamDecode_u { + unsigned long long table[LZ4_STREAMDECODESIZE_U64]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + Should deprecation warnings be a problem, + it is generally possible to disable them, + typically with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined(__clang__) /* clang doesn't handle mixed C++11 and CNU attributes */ +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif (LZ4_GCC_VERSION >= 405) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif (LZ4_GCC_VERSION >= 301) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") +# define LZ4_DEPRECATED(message) +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/* Obsolete compression functions */ +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress (const char* source, char* dest, int sourceSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/* Obsolete decompression functions */ +LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast() instead") int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe() instead") int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions; use new streaming interface whenever possible */ +LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") int LZ4_sizeofStreamState(void); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStream() instead") int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDict() instead") char* LZ4_slideInputBuffer (void* state); + +/* Obsolete streaming decoding functions */ +LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +#endif /* LZ4_H_2983827168210 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.c b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.c new file mode 100644 index 0000000..f2c2566 --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.c @@ -0,0 +1,893 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Copyright (C) 2011-2017, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ + + +/* ************************************* +* Tuning Parameter +***************************************/ + +/*! HEAPMODE : + * Select how default compression function will allocate workplace memory, + * in stack (0:fastest), or in heap (1:requires malloc()). + * Since workplace is rather large, heap mode is recommended. + */ +#ifndef LZ4HC_HEAPMODE +# define LZ4HC_HEAPMODE 1 +#endif + + +/*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY +#include "lz4hc.h" + + +/*=== Common LZ4 definitions ===*/ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#endif + +#define LZ4_COMMONDEFS_ONLY +#include "lz4.c" /* LZ4_count, constants, mem */ + + +/*=== Constants ===*/ +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) + + +/*=== Macros ===*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) +#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ +#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ + +static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } + + + +/************************************** +* HC Compression +**************************************/ +static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start) +{ + MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); + hc4->nextToUpdate = 64 KB; + hc4->base = start - 64 KB; + hc4->end = start; + hc4->dictBase = start - 64 KB; + hc4->dictLimit = 64 KB; + hc4->lowLimit = 64 KB; +} + + +/* Update chains up to ip (excluded) */ +LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) +{ + U16* const chainTable = hc4->chainTable; + U32* const hashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + U32 const target = (U32)(ip - base); + U32 idx = hc4->nextToUpdate; + + while (idx < target) { + U32 const h = LZ4HC_hashPtr(base+idx); + size_t delta = idx - hashTable[h]; + if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; + DELTANEXTU16(chainTable, idx) = (U16)delta; + hashTable[h] = idx; + idx++; + } + + hc4->nextToUpdate = target; +} + +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +LZ4_FORCE_INLINE +int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back=0; + while ( (ip+back > iMin) + && (match+back > mMin) + && (ip[back-1] == match[back-1])) + back--; + return back; +} + +/* LZ4HC_countPattern() : + * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ +static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) +{ + const BYTE* const iStart = ip; + reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32; + + while (likely(ip < iEnd-(sizeof(pattern)-1))) { + reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; + if (!diff) { ip+=sizeof(pattern); continue; } + ip += LZ4_NbCommonBytes(diff); + return (unsigned)(ip - iStart); + } + + if (LZ4_isLittleEndian()) { + reg_t patternByte = pattern; + while ((ip>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern)*8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) break; + ip ++; bitOffset -= 8; + } + } + + return (unsigned)(ip - iStart); +} + +/* LZ4HC_reverseCountPattern() : + * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) + * read using natural platform endianess */ +static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip >= iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ + while (likely(ip>iLow)) { + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; + } } + return (unsigned)(iStart - ip); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; + +LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( + LZ4HC_CCtx_internal* hc4, + const BYTE* const ip, + const BYTE* const iLowLimit, + const BYTE* const iHighLimit, + int longest, + const BYTE** matchpos, + const BYTE** startpos, + const int maxNbAttempts, + const int patternAnalysis) +{ + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + const U32 dictLimit = hc4->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE; + const BYTE* const dictBase = hc4->dictBase; + int const delta = (int)(ip-iLowLimit); + int nbAttempts = maxNbAttempts; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; + + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First match at index %u / %u (lowLimit)", + matchIndex, lowLimit); + + while ((matchIndex>=lowLimit) && (nbAttempts)) { + DEBUGLOG(7, "remaining attempts : %i", nbAttempts); + nbAttempts--; + if (matchIndex >= dictLimit) { + const BYTE* const matchPtr = base + matchIndex; + if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { + if (LZ4_read32(matchPtr) == pattern) { + int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + #if 0 + /* more generic but unfortunately slower on clang */ + int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr); + #else + int back = 0; + while ( (ip+back > iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) { + back--; + } + #endif + mlt -= back; + + if (mlt > longest) { + longest = mlt; + *matchpos = matchPtr+back; + *startpos = ip+back; + } } + } + } else { /* matchIndex < dictLimit */ + const BYTE* const matchPtr = dictBase + matchIndex; + if (LZ4_read32(matchPtr) == pattern) { + int mlt; + int back = 0; + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) + mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); + while ( (ip+back > iLowLimit) + && (matchIndex+back > lowLimit) + && (ip[back-1] == matchPtr[back-1])) + back--; + mlt -= back; + if (mlt > longest) { + longest = mlt; + *matchpos = base + matchIndex + back; + *startpos = ip + back; + } } } + + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + matchIndex -= nextOffset; + if (patternAnalysis && nextOffset==1) { + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if ( ((pattern & 0xFFFF) == (pattern >> 16)) + & ((pattern & 0xFF) == (pattern >> 24)) ) { + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4; + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) + && (matchIndex >= dictLimit) ) { /* same segment only */ + const BYTE* const matchPtr = base + matchIndex; + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ + size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; + size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); + size_t const currentSegmentLength = backLength + forwardPatternLength; + + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ + matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + } else { + matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ + } + } } } } + } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */ + + return longest; +} + +LZ4_FORCE_INLINE +int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts, + const int patternAnalysis) +{ + const BYTE* uselessPtr = ip; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis); +} + + + +typedef enum { + noLimit = 0, + limitedOutput = 1, + limitedDestSize = 2, +} limitedOutput_directive; + +/* LZ4HC_encodeSequence() : + * @return : 0 if ok, + * 1 if buffer issue detected */ +LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** ip, + BYTE** op, + const BYTE** anchor, + int matchLength, + const BYTE* const match, + limitedOutput_directive limit, + BYTE* oend) +{ + size_t length; + BYTE* const token = (*op)++; + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2) + static const BYTE* start = NULL; + static U32 totalCost = 0; + U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start); + U32 const ll = (U32)(*ip - *anchor); + U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; + U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start==NULL) start = *anchor; /* only works for single segment */ + //g_debuglog_enable = (pos >= 2228) & (pos <= 2262); + DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", + pos, + (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), + cost, totalCost); + totalCost += cost; +#endif + + /* Encode Literal length */ + length = (size_t)(*ip - *anchor); + if ((limit) && ((*op + (length >> 8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ + if (length >= RUN_MASK) { + size_t len = length - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for(; len >= 255 ; len -= 255) *(*op)++ = 255; + *(*op)++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } + + /* Copy Literals */ + LZ4_wildCopy(*op, *anchor, (*op) + length); + *op += length; + + /* Encode Offset */ + LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; + + /* Encode MatchLength */ + assert(matchLength >= MINMATCH); + length = (size_t)(matchLength - MINMATCH); + if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ + if (length >= ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } + if (length >= 255) { length -= 255; *(*op)++ = 255; } + *(*op)++ = (BYTE)length; + } else { + *token += (BYTE)(length); + } + + /* Prepare next loop */ + *ip += matchLength; + *anchor = *ip; + + return 0; +} + +/* btopt */ +#include "lz4opt.h" + + +static int LZ4HC_compress_hashChain ( + LZ4HC_CCtx_internal* const ctx, + const char* const source, + char* const dest, + int* srcSizePtr, + int const maxOutputSize, + unsigned maxNbAttempts, + limitedOutput_directive limit + ) +{ + const int inputSize = *srcSizePtr; + const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */ + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* optr = (BYTE*) dest; + BYTE* op = (BYTE*) dest; + BYTE* oend = op + maxOutputSize; + + int ml, ml2, ml3, ml0; + const BYTE* ref = NULL; + const BYTE* start2 = NULL; + const BYTE* ref2 = NULL; + const BYTE* start3 = NULL; + const BYTE* ref3 = NULL; + const BYTE* start0; + const BYTE* ref0; + + /* init */ + *srcSizePtr = 0; + if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + + /* Main Loop */ + while (ip < mflimit) { + ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis); + if (ml ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ + + if (start2 + ml2 < mflimit) + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, + maxNbAttempts, patternAnalysis); + else + ml3 = ml2; + + if (ml3 == ml2) { /* No better match : 2 sequences to encode */ + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) ml = (int)(start2 - ip); + /* Now, encode 2 sequences */ + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; + ip = start2; + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) goto _dest_overflow; + continue; + } + + if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ + if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip+ml) { + int correction = (int)(ip+ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _Search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _Search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least the first one + * ip & ref are known; Now for ml + */ + if (start2 < ip+ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else { + ml = (int)(start2 - ip); + } + } + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _Search3; + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + litLength + lastRunSize; + if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; /* Check output limit */ + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1; + litLength = (lastRunSize + 255 - RUN_MASK) / 255; + lastRunSize -= litLength; + } + ip = anchor + lastRunSize; + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) (((char*)op)-dest); + +_dest_overflow: + if (limit == limitedDestSize) { + op = optr; /* restore correct out pointer */ + goto _last_literals; + } + return 0; +} + + +static int LZ4HC_compress_generic ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + typedef enum { lz4hc, lz4opt } lz4hc_strat_e; + typedef struct { + lz4hc_strat_e strat; + U32 nbSearches; + U32 targetLength; + } cParams_t; + static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { + { lz4hc, 2, 16 }, /* 0, unused */ + { lz4hc, 2, 16 }, /* 1, unused */ + { lz4hc, 2, 16 }, /* 2, unused */ + { lz4hc, 4, 16 }, /* 3 */ + { lz4hc, 8, 16 }, /* 4 */ + { lz4hc, 16, 16 }, /* 5 */ + { lz4hc, 32, 16 }, /* 6 */ + { lz4hc, 64, 16 }, /* 7 */ + { lz4hc, 128, 16 }, /* 8 */ + { lz4hc, 256, 16 }, /* 9 */ + { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ + { lz4opt, 512,128 }, /*11 */ + { lz4opt,8192, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ + }; + + if (limit == limitedDestSize && dstCapacity < 1) return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ + + ctx->end += *srcSizePtr; + if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ + cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); + assert(cLevel >= 0); + assert(cLevel <= LZ4HC_CLEVEL_MAX); + { cParams_t const cParam = clTable[cLevel]; + if (cParam.strat == lz4hc) + return LZ4HC_compress_hashChain(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, limit); + assert(cParam.strat == lz4opt); + return LZ4HC_compress_optimal(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, cParam.targetLength, limit, + cLevel == LZ4HC_CLEVEL_MAX); /* ultra mode */ + } +} + + +int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); } + +int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; + if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ + LZ4HC_init (ctx, (const BYTE*)src); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); + else + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, noLimit); +} + +int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); +#else + LZ4_streamHC_t state; + LZ4_streamHC_t* const statePtr = &state; +#endif + int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + free(statePtr); +#endif + return cSize; +} + +/* LZ4_compress_HC_destSize() : + * only compatible with regular HC parser */ +int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) +{ + LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse; + LZ4HC_init(ctx, (const BYTE*) source); + return LZ4HC_compress_generic(ctx, source, dest, sourceSizePtr, targetDestSize, cLevel, limitedDestSize); +} + + + +/************************************** +* Streaming Functions +**************************************/ +/* allocation */ +LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); } +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { + if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ + free(LZ4_streamHCPtr); + return 0; +} + + +/* initialization */ +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ + LZ4_streamHCPtr->internal_donotuse.base = NULL; + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + if (compressionLevel < 1) compressionLevel = 1; + if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel; +} + +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + if (dictSize > 64 KB) { + dictionary += dictSize - 64 KB; + dictSize = 64 KB; + } + LZ4HC_init (ctxPtr, (const BYTE*)dictionary); + ctxPtr->end = (const BYTE*)dictionary + dictSize; + if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); + return dictSize; +} + + +/* compression */ + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) +{ + if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + + /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); + ctxPtr->dictBase = ctxPtr->base; + ctxPtr->base = newBlock - ctxPtr->dictLimit; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ +} + +static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int dstCapacity, + limitedOutput_directive limit) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + /* auto-init if forgotten */ + if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) src); + + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit; + if (dictSize > 64 KB) dictSize = 64 KB; + LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); + } + + /* Check if blocks follow each other */ + if ((const BYTE*)src != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; + const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; + const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; + if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { + if (sourceEnd > dictEnd) sourceEnd = dictEnd; + ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); + if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit; + } + } + + return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); +} + +int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) +{ + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); + else + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, noLimit); +} + +int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) +{ + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize); +} + + + +/* dictionary saving */ + +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +{ + LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit)); + if (dictSize > 64 KB) dictSize = 64 KB; + if (dictSize < 4) dictSize = 0; + if (dictSize > prefixSize) dictSize = prefixSize; + memmove(safeBuffer, streamPtr->end - dictSize, dictSize); + { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); + streamPtr->end = (const BYTE*)safeBuffer + dictSize; + streamPtr->base = streamPtr->end - endIndex; + streamPtr->dictLimit = endIndex - dictSize; + streamPtr->lowLimit = endIndex - dictSize; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; +} + + +/*********************************** +* Deprecated Functions +***********************************/ +/* These functions currently generate deprecation warnings */ +/* Deprecated compression functions */ +int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } + + +/* Deprecated streaming functions */ +int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; } + +int LZ4_resetStreamStateHC(void* state, char* inputBuffer) +{ + LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; + if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ + LZ4HC_init(ctx, (const BYTE*)inputBuffer); + ctx->inputBuffer = (BYTE*)inputBuffer; + return 0; +} + +void* LZ4_createHC (char* inputBuffer) +{ + LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t)); + if (hc4 == NULL) return NULL; /* not enough memory */ + LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer; + return hc4; +} + +int LZ4_freeHC (void* LZ4HC_Data) { + if (!LZ4HC_Data) return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; +} + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, noLimit); +} + +int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); +} + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse; + int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB); + return (char*)(hc4->inputBuffer + dictSize); +} diff --git a/c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.h b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.h new file mode 100644 index 0000000..d41bf42 --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4hc.h @@ -0,0 +1,272 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Header File + Copyright (C) 2011-2017, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef LZ4_HC_H_19834876238432 +#define LZ4_HC_H_19834876238432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* --- Dependency --- */ +/* note : lz4hc requires lz4.h/lz4.c for compilation */ +#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ + + +/* --- Useful constants --- */ +#define LZ4HC_CLEVEL_MIN 3 +#define LZ4HC_CLEVEL_DEFAULT 9 +#define LZ4HC_CLEVEL_OPT_MIN 10 +#define LZ4HC_CLEVEL_MAX 12 + + +/*-************************************ + * Block Compression + **************************************/ +/*! LZ4_compress_HC() : + * Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm. + * `dst` must be already allocated. + * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") + * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. + * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + */ +LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); + + +/* Note : + * Decompression functions are provided within "lz4.h" (BSD license) + */ + + +/*! LZ4_compress_HC_extStateHC() : + * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. + * `state` size is provided by LZ4_sizeofStateHC(). + * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). + */ +LZ4LIB_API int LZ4_sizeofStateHC(void); +LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); + + +/*-************************************ + * Streaming Compression + * Bufferless synchronous API + **************************************/ + typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ + +/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : + * These functions create and release memory for LZ4 HC streaming state. + * Newly created states are automatically initialized. + * Existing states can be re-used several times, using LZ4_resetStreamHC(). + * These methods are API and ABI stable, they can be used in combination with a DLL. + */ +LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); +LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); + +LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel); +LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); + +LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize); + +LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); + +/* + These functions compress data in successive blocks of any size, using previous blocks as dictionary. + One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. + There is an exception for ring buffers, which can be smaller than 64 KB. + Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue(). + + Before starting compression, state must be properly initialized, using LZ4_resetStreamHC(). + A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional). + + Then, use LZ4_compress_HC_continue() to compress each successive block. + Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression. + 'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success. + Because in case of failure, the API does not guarantee context recovery, and context will have to be reset. + If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead. + + If, for any reason, previous data block can't be preserved unmodified in memory for next compression block, + you can save it to a more stable memory space, using LZ4_saveDictHC(). + Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'. +*/ + + +/*-************************************************************** + * PRIVATE DEFINITIONS : + * Do not use these definitions. + * They are exposed to allow static allocation of `LZ4_streamHC_t`. + * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4 + ****************************************************************/ +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<= 199901L) /* C99 */) +#include + +typedef struct +{ + uint32_t hashTable[LZ4HC_HASHTABLESIZE]; + uint16_t chainTable[LZ4HC_MAXD]; + const uint8_t* end; /* next block here to continue on current prefix */ + const uint8_t* base; /* All index relative to this position */ + const uint8_t* dictBase; /* alternate base for extDict */ + uint8_t* inputBuffer; /* deprecated */ + uint32_t dictLimit; /* below that point, need extDict */ + uint32_t lowLimit; /* below that point, no more dict */ + uint32_t nextToUpdate; /* index from which to continue dictionary update */ + int compressionLevel; +} LZ4HC_CCtx_internal; + +#else + +typedef struct +{ + unsigned int hashTable[LZ4HC_HASHTABLESIZE]; + unsigned short chainTable[LZ4HC_MAXD]; + const unsigned char* end; /* next block here to continue on current prefix */ + const unsigned char* base; /* All index relative to this position */ + const unsigned char* dictBase; /* alternate base for extDict */ + unsigned char* inputBuffer; /* deprecated */ + unsigned int dictLimit; /* below that point, need extDict */ + unsigned int lowLimit; /* below that point, no more dict */ + unsigned int nextToUpdate; /* index from which to continue dictionary update */ + int compressionLevel; +} LZ4HC_CCtx_internal; + +#endif + +#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 262200 */ +#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t)) +union LZ4_streamHC_u { + size_t table[LZ4_STREAMHCSIZE_SIZET]; + LZ4HC_CCtx_internal internal_donotuse; +}; /* previously typedef'd to LZ4_streamHC_t */ +/* + LZ4_streamHC_t : + This structure allows static allocation of LZ4 HC streaming state. + State must be initialized using LZ4_resetStreamHC() before first use. + + Static allocation shall only be used in combination with static linking. + When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable. +*/ + + +/*-************************************ +* Deprecated Functions +**************************************/ +/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */ + +/* deprecated compression functions */ +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC (const char* source, char* dest, int inputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/* Deprecated Streaming functions using older model; should no longer be used */ +LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDictHC() instead") char* LZ4_slideInputBufferHC (void* LZ4HC_Data); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") int LZ4_freeHC (void* LZ4HC_Data); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int LZ4_sizeofStreamStateHC(void); +LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStreamStateHC(void* state, char* inputBuffer); + + +#if defined (__cplusplus) +} +#endif + +#endif /* LZ4_HC_H_19834876238432 */ + + +/*-************************************************** + * !!!!! STATIC LINKING ONLY !!!!! + * Following definitions are considered experimental. + * They should not be linked from DLL, + * as there is no guarantee of API stability yet. + * Prototypes will be promoted to "stable" status + * after successfull usage in real-life scenarios. + ***************************************************/ +#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */ +#ifndef LZ4_HC_SLO_098092834 +#define LZ4_HC_SLO_098092834 + +/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental) + * Will try to compress as much data from `src` as possible + * that can fit into `targetDstSize` budget. + * Result is provided in 2 parts : + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src` + */ +int LZ4_compress_HC_destSize(void* LZ4HC_Data, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize, + int compressionLevel); + +/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental) + * Similar as LZ4_compress_HC_continue(), + * but will read a variable nb of bytes from `src` + * to fit into `targetDstSize` budget. + * Result is provided in 2 parts : + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`. + */ +int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize); + +/*! LZ4_setCompressionLevel() : v1.8.0 (experimental) + * It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*() + */ +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + + + +#endif /* LZ4_HC_SLO_098092834 */ +#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/c-blosc/internal-complibs/lz4-1.8.1.2/lz4opt.h b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4opt.h new file mode 100644 index 0000000..5a8438c --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.8.1.2/lz4opt.h @@ -0,0 +1,356 @@ +/* + lz4opt.h - Optimal Mode of LZ4 + Copyright (C) 2015-2017, Przemyslaw Skibinski + Note : this file is intended to be included within lz4hc.c + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +#define LZ4_OPT_NUM (1<<12) + +typedef struct { + int price; + int off; + int mlen; + int litlen; +} LZ4HC_optimal_t; + + +/* price in bytes */ +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) +{ + int price = litlen; + if (litlen >= (int)RUN_MASK) + price += 1 + (litlen-RUN_MASK)/255; + return price; +} + + +/* requires mlen >= MINMATCH */ +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) +{ + int price = 1 + 2 ; /* token + 16-bit offset */ + + price += LZ4HC_literalsPrice(litlen); + + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + (mlen-(ML_MASK+MINMATCH))/255; + + return price; +} + + +/*-************************************* +* Match finder +***************************************/ +typedef struct { + int off; + int len; +} LZ4HC_match_t; + +LZ4_FORCE_INLINE +LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches) +{ + LZ4HC_match_t match = { 0 , 0 }; + const BYTE* matchPtr = NULL; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, + ip, ip, iHighLimit, minLen, &matchPtr, &ip, + nbSearches, 1 /* patternAnalysis */); + if (matchLength <= minLen) return match; + match.len = matchLength; + match.off = (int)(ip-matchPtr); + return match; +} + + +static int LZ4HC_compress_optimal ( + LZ4HC_CCtx_internal* ctx, + const char* const source, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, + size_t sufficient_len, + limitedOutput_directive limit, + int const fullUpdate + ) +{ +#define TRAILING_LITERALS 3 + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* this uses a bit too much stack memory to my taste ... */ + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dst; + BYTE* opSaved = (BYTE*) dst; + BYTE* oend = op + dstCapacity; + + /* init */ + DEBUGLOG(5, "LZ4HC_compress_optimal"); + *srcSizePtr = 0; + if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; + + /* Main Loop */ + assert(ip - anchor < LZ4_MAX_INPUT_SIZE); + while (ip < mflimit) { + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; + + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches); + if (firstMatch.len==0) { ip++; continue; } + + if ((size_t)firstMatch.len > sufficient_len) { + /* good enough solution : immediate encoding */ + int const firstML = firstMatch.len; + const BYTE* const matchPos = ip - firstMatch.off; + opSaved = op; + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ + goto _dest_overflow; + continue; + } + + /* set prices for first positions (literals) */ + { int rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } } + /* set prices using initial match */ + { int mlen = MINMATCH; + int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + + /* check further positions */ + for (cur = 1; cur < last_match_pos; cur++) { + const BYTE* const curPtr = ip + cur; + LZ4HC_match_t newMatch; + + if (curPtr >= mflimit) break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur+1].price <= opt[cur].price) continue; + } + + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches); + if (!newMatch.len) continue; + + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { + /* immediate encoding */ + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; + goto encode; + } + + /* before match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } + + /* set prices using match at position = cur */ + { int const matchML = newMatch.len; + int ml = MINMATCH; + + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last pos of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ + + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; + +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path") + DEBUGLOG(6, "last_match_pos = %i", last_match_pos); + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } } + + /* encode all recorded sequences in order */ + { int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <= MAX_DISTANCE)); + opSaved = op; + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ + goto _dest_overflow; + } } + } /* while (ip < mflimit) */ + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + litLength + lastRunSize; + if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; /* Check output limit */ + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1; + litLength = (lastRunSize + 255 - RUN_MASK) / 255; + lastRunSize -= litLength; + } + ip = anchor + lastRunSize; + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) ((char*)op-dst); + +_dest_overflow: + if (limit == limitedDestSize) { + op = opSaved; /* restore correct out pointer */ + goto _last_literals; + } + return 0; +} diff --git a/c-blosc/internal-complibs/snappy-1.1.1/add-version.patch b/c-blosc/internal-complibs/snappy-1.1.1/add-version.patch new file mode 100644 index 0000000..d9b9873 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/add-version.patch @@ -0,0 +1,19 @@ +diff --git a/internal-complibs/snappy-1.1.1/snappy-c.h b/internal-complibs/snappy-1.1.1/snappy-c.h +index c6c2a86..eabe3ae 100644 +--- a/internal-complibs/snappy-1.1.1/snappy-c.h ++++ b/internal-complibs/snappy-1.1.1/snappy-c.h +@@ -37,6 +37,14 @@ + extern "C" { + #endif + ++// The next is for getting the Snappy version even if used the C API ++// Please note that this is only defined in the Blosc sources of Snappy. ++#define SNAPPY_MAJOR 1 ++#define SNAPPY_MINOR 1 ++#define SNAPPY_PATCHLEVEL 1 ++#define SNAPPY_VERSION \ ++ ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) ++ + #include + + /* diff --git a/c-blosc/internal-complibs/snappy-1.1.1/msvc1.patch b/c-blosc/internal-complibs/snappy-1.1.1/msvc1.patch new file mode 100644 index 0000000..21f0aaa --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/msvc1.patch @@ -0,0 +1,17 @@ +--- a/internal-complibs/snappy-1.1.1/snappy.h ++++ b/internal-complibs/snappy-1.1.1/snappy.h +@@ -44,6 +44,14 @@ + + #include "snappy-stubs-public.h" + ++// Windows does not define ssize_t by default. This is a workaround. ++// Please note that this is only defined in the Blosc sources of Snappy. ++#if defined(_WIN32) && !defined(__MINGW32__) ++#include ++typedef SSIZE_T ssize_t; ++#endif ++ ++ + namespace snappy { + class Source; + class Sink; diff --git a/c-blosc/internal-complibs/snappy-1.1.1/msvc2.patch b/c-blosc/internal-complibs/snappy-1.1.1/msvc2.patch new file mode 100644 index 0000000..ccface4 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/msvc2.patch @@ -0,0 +1,27 @@ +diff --git a/internal-complibs/snappy-1.1.1/snappy-stubs-public.h b/internal-complibs/snappy-1.1.1/snappy-stubs-public.h +index ecda439..4cc8965 100644 +--- a/internal-complibs/snappy-1.1.1/snappy-stubs-public.h ++++ b/internal-complibs/snappy-1.1.1/snappy-stubs-public.h +@@ -36,8 +36,21 @@ + #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ + #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ + +-#if 1 ++// MSVC 2008 does not include stdint.h. This is a workaround by Mark W. ++// Please note that this is only defined in the Blosc sources of Snappy. ++#if !defined(_MSC_VER) || _MSC_VER >= 1600 + #include ++#else ++typedef signed char int8_t; ++typedef short int16_t; ++typedef int int32_t; ++typedef __int64 int64_t; ++typedef ptrdiff_t intptr_t; ++typedef unsigned char uint8_t; ++typedef unsigned short uint16_t; ++typedef unsigned int uint32_t; ++typedef unsigned __int64 uint64_t; ++typedef size_t uintptr_t; + #endif + + #if 1 diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-c.cc b/c-blosc/internal-complibs/snappy-1.1.1/snappy-c.cc new file mode 100644 index 0000000..473a0b0 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-c.cc @@ -0,0 +1,90 @@ +// Copyright 2011 Martin Gieseking . +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "snappy.h" +#include "snappy-c.h" + +extern "C" { + +snappy_status snappy_compress(const char* input, + size_t input_length, + char* compressed, + size_t *compressed_length) { + if (*compressed_length < snappy_max_compressed_length(input_length)) { + return SNAPPY_BUFFER_TOO_SMALL; + } + snappy::RawCompress(input, input_length, compressed, compressed_length); + return SNAPPY_OK; +} + +snappy_status snappy_uncompress(const char* compressed, + size_t compressed_length, + char* uncompressed, + size_t* uncompressed_length) { + size_t real_uncompressed_length; + if (!snappy::GetUncompressedLength(compressed, + compressed_length, + &real_uncompressed_length)) { + return SNAPPY_INVALID_INPUT; + } + if (*uncompressed_length < real_uncompressed_length) { + return SNAPPY_BUFFER_TOO_SMALL; + } + if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) { + return SNAPPY_INVALID_INPUT; + } + *uncompressed_length = real_uncompressed_length; + return SNAPPY_OK; +} + +size_t snappy_max_compressed_length(size_t source_length) { + return snappy::MaxCompressedLength(source_length); +} + +snappy_status snappy_uncompressed_length(const char *compressed, + size_t compressed_length, + size_t *result) { + if (snappy::GetUncompressedLength(compressed, + compressed_length, + result)) { + return SNAPPY_OK; + } else { + return SNAPPY_INVALID_INPUT; + } +} + +snappy_status snappy_validate_compressed_buffer(const char *compressed, + size_t compressed_length) { + if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) { + return SNAPPY_OK; + } else { + return SNAPPY_INVALID_INPUT; + } +} + +} // extern "C" diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-c.h b/c-blosc/internal-complibs/snappy-1.1.1/snappy-c.h new file mode 100644 index 0000000..e463fd4 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-c.h @@ -0,0 +1,146 @@ +/* + * Copyright 2011 Martin Gieseking . + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Plain C interface (a wrapper around the C++ implementation). + */ + +#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ +#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// The next is for getting the Snappy version even if used the C API. +// Please note that this is only defined in the Blosc sources of Snappy. +#define SNAPPY_MAJOR 1 +#define SNAPPY_MINOR 1 +#define SNAPPY_PATCHLEVEL 1 +#define SNAPPY_VERSION \ + ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) + +#include + +/* + * Return values; see the documentation for each function to know + * what each can return. + */ +typedef enum { + SNAPPY_OK = 0, + SNAPPY_INVALID_INPUT = 1, + SNAPPY_BUFFER_TOO_SMALL = 2 +} snappy_status; + +/* + * Takes the data stored in "input[0..input_length-1]" and stores + * it in the array pointed to by "compressed". + * + * signals the space available in "compressed". + * If it is not at least equal to "snappy_max_compressed_length(input_length)", + * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression, + * contains the true length of the compressed output, + * and SNAPPY_OK is returned. + * + * Example: + * size_t output_length = snappy_max_compressed_length(input_length); + * char* output = (char*)malloc(output_length); + * if (snappy_compress(input, input_length, output, &output_length) + * == SNAPPY_OK) { + * ... Process(output, output_length) ... + * } + * free(output); + */ +snappy_status snappy_compress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + +/* + * Given data in "compressed[0..compressed_length-1]" generated by + * calling the snappy_compress routine, this routine stores + * the uncompressed data to + * uncompressed[0..uncompressed_length-1]. + * Returns failure (a value not equal to SNAPPY_OK) if the message + * is corrupted and could not be decrypted. + * + * signals the space available in "uncompressed". + * If it is not at least equal to the value returned by + * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL + * is returned. After successful decompression, + * contains the true length of the decompressed output. + * + * Example: + * size_t output_length; + * if (snappy_uncompressed_length(input, input_length, &output_length) + * != SNAPPY_OK) { + * ... fail ... + * } + * char* output = (char*)malloc(output_length); + * if (snappy_uncompress(input, input_length, output, &output_length) + * == SNAPPY_OK) { + * ... Process(output, output_length) ... + * } + * free(output); + */ +snappy_status snappy_uncompress(const char* compressed, + size_t compressed_length, + char* uncompressed, + size_t* uncompressed_length); + +/* + * Returns the maximal size of the compressed representation of + * input data that is "source_length" bytes in length. + */ +size_t snappy_max_compressed_length(size_t source_length); + +/* + * REQUIRES: "compressed[]" was produced by snappy_compress() + * Returns SNAPPY_OK and stores the length of the uncompressed data in + * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error. + * This operation takes O(1) time. + */ +snappy_status snappy_uncompressed_length(const char* compressed, + size_t compressed_length, + size_t* result); + +/* + * Check if the contents of "compressed[]" can be uncompressed successfully. + * Does not return the uncompressed data; if so, returns SNAPPY_OK, + * or if not, returns SNAPPY_INVALID_INPUT. + * Takes time proportional to compressed_length, but is usually at least a + * factor of four faster than actual decompression. + */ +snappy_status snappy_validate_compressed_buffer(const char* compressed, + size_t compressed_length); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */ diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-internal.h b/c-blosc/internal-complibs/snappy-1.1.1/snappy-internal.h new file mode 100644 index 0000000..c99d331 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-internal.h @@ -0,0 +1,150 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Internals shared between the Snappy implementation and its unittest. + +#ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_ +#define UTIL_SNAPPY_SNAPPY_INTERNAL_H_ + +#include "snappy-stubs-internal.h" + +namespace snappy { +namespace internal { + +class WorkingMemory { + public: + WorkingMemory() : large_table_(NULL) { } + ~WorkingMemory() { delete[] large_table_; } + + // Allocates and clears a hash table using memory in "*this", + // stores the number of buckets in "*table_size" and returns a pointer to + // the base of the hash table. + uint16* GetHashTable(size_t input_size, int* table_size); + + private: + uint16 small_table_[1<<10]; // 2KB + uint16* large_table_; // Allocated only when needed + + DISALLOW_COPY_AND_ASSIGN(WorkingMemory); +}; + +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input_length <= kBlockSize" +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input_length)" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +char* CompressFragment(const char* input, + size_t input_length, + char* op, + uint16* table, + const int table_size); + +// Return the largest n such that +// +// s1[0,n-1] == s2[0,n-1] +// and n <= (s2_limit - s2). +// +// Does not read *s2_limit or beyond. +// Does not read *(s1 + (s2_limit - s2)) or beyond. +// Requires that s2_limit >= s2. +// +// Separate implementation for x86_64, for speed. Uses the fact that +// x86_64 is little endian. +#if defined(ARCH_K8) +static inline int FindMatchLength(const char* s1, + const char* s2, + const char* s2_limit) { + assert(s2_limit >= s2); + int matched = 0; + + // Find out how long the match is. We loop over the data 64 bits at a + // time until we find a 64-bit block that doesn't match; then we find + // the first non-matching bit and use that to calculate the total + // length of the match. + while (PREDICT_TRUE(s2 <= s2_limit - 8)) { + if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) { + s2 += 8; + matched += 8; + } else { + // On current (mid-2008) Opteron models there is a 3% more + // efficient code sequence to find the first non-matching byte. + // However, what follows is ~10% better on Intel Core 2 and newer, + // and we expect AMD's bsf instruction to improve. + uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero64(x); + matched += matching_bits >> 3; + return matched; + } + } + while (PREDICT_TRUE(s2 < s2_limit)) { + if (PREDICT_TRUE(s1[matched] == *s2)) { + ++s2; + ++matched; + } else { + return matched; + } + } + return matched; +} +#else +static inline int FindMatchLength(const char* s1, + const char* s2, + const char* s2_limit) { + // Implementation based on the x86-64 version, above. + assert(s2_limit >= s2); + int matched = 0; + + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } + if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { + uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } + return matched; +} +#endif + +} // end namespace internal +} // end namespace snappy + +#endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_ diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.cc b/c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.cc new file mode 100644 index 0000000..5844552 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.cc @@ -0,0 +1,71 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "snappy-sinksource.h" + +namespace snappy { + +Source::~Source() { } + +Sink::~Sink() { } + +char* Sink::GetAppendBuffer(size_t length, char* scratch) { + return scratch; +} + +ByteArraySource::~ByteArraySource() { } + +size_t ByteArraySource::Available() const { return left_; } + +const char* ByteArraySource::Peek(size_t* len) { + *len = left_; + return ptr_; +} + +void ByteArraySource::Skip(size_t n) { + left_ -= n; + ptr_ += n; +} + +UncheckedByteArraySink::~UncheckedByteArraySink() { } + +void UncheckedByteArraySink::Append(const char* data, size_t n) { + // Do no copying if the caller filled in the result of GetAppendBuffer() + if (data != dest_) { + memcpy(dest_, data, n); + } + dest_ += n; +} + +char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { + return dest_; +} + +} diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.h b/c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.h new file mode 100644 index 0000000..faabfa1 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-sinksource.h @@ -0,0 +1,137 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_ +#define UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_ + +#include + + +namespace snappy { + +// A Sink is an interface that consumes a sequence of bytes. +class Sink { + public: + Sink() { } + virtual ~Sink(); + + // Append "bytes[0,n-1]" to this. + virtual void Append(const char* bytes, size_t n) = 0; + + // Returns a writable buffer of the specified length for appending. + // May return a pointer to the caller-owned scratch buffer which + // must have at least the indicated length. The returned buffer is + // only valid until the next operation on this Sink. + // + // After writing at most "length" bytes, call Append() with the + // pointer returned from this function and the number of bytes + // written. Many Append() implementations will avoid copying + // bytes if this function returned an internal buffer. + // + // If a non-scratch buffer is returned, the caller may only pass a + // prefix of it to Append(). That is, it is not correct to pass an + // interior pointer of the returned array to Append(). + // + // The default implementation always returns the scratch buffer. + virtual char* GetAppendBuffer(size_t length, char* scratch); + + + private: + // No copying + Sink(const Sink&); + void operator=(const Sink&); +}; + +// A Source is an interface that yields a sequence of bytes +class Source { + public: + Source() { } + virtual ~Source(); + + // Return the number of bytes left to read from the source + virtual size_t Available() const = 0; + + // Peek at the next flat region of the source. Does not reposition + // the source. The returned region is empty iff Available()==0. + // + // Returns a pointer to the beginning of the region and store its + // length in *len. + // + // The returned region is valid until the next call to Skip() or + // until this object is destroyed, whichever occurs first. + // + // The returned region may be larger than Available() (for example + // if this ByteSource is a view on a substring of a larger source). + // The caller is responsible for ensuring that it only reads the + // Available() bytes. + virtual const char* Peek(size_t* len) = 0; + + // Skip the next n bytes. Invalidates any buffer returned by + // a previous call to Peek(). + // REQUIRES: Available() >= n + virtual void Skip(size_t n) = 0; + + private: + // No copying + Source(const Source&); + void operator=(const Source&); +}; + +// A Source implementation that yields the contents of a flat array +class ByteArraySource : public Source { + public: + ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } + virtual ~ByteArraySource(); + virtual size_t Available() const; + virtual const char* Peek(size_t* len); + virtual void Skip(size_t n); + private: + const char* ptr_; + size_t left_; +}; + +// A Sink implementation that writes to a flat array without any bound checks. +class UncheckedByteArraySink : public Sink { + public: + explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } + virtual ~UncheckedByteArraySink(); + virtual void Append(const char* data, size_t n); + virtual char* GetAppendBuffer(size_t len, char* scratch); + + // Return the current output pointer so that a caller can see how + // many bytes were produced. + // Note: this is not a Sink method. + char* CurrentDestination() const { return dest_; } + private: + char* dest_; +}; + + +} + +#endif // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_ diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.cc b/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.cc new file mode 100644 index 0000000..6ed3343 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.cc @@ -0,0 +1,42 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include + +#include "snappy-stubs-internal.h" + +namespace snappy { + +void Varint::Append32(string* s, uint32 value) { + char buf[Varint::kMax32]; + const char* p = Varint::Encode32(buf, value); + s->append(buf, p - buf); +} + +} // namespace snappy diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.h b/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.h new file mode 100644 index 0000000..12393b6 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-internal.h @@ -0,0 +1,491 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various stubs for the open-source version of Snappy. + +#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ +#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include +#include +#include + +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +#include "snappy-stubs-public.h" + +#if defined(__x86_64__) + +// Enable 64-bit optimized versions of some routines. +#define ARCH_K8 1 + +#endif + +// Needed by OS X, among others. +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +// Pull in std::min, std::ostream, and the likes. This is safe because this +// header file is never used from any public header files. +using namespace std; + +// The size of an array, if known at compile-time. +// Will give unexpected results if used on a pointer. +// We undefine it first, since some compilers already have a definition. +#ifdef ARRAYSIZE +#undef ARRAYSIZE +#endif +#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) + +// Static prediction hints. +#ifdef HAVE_BUILTIN_EXPECT +#define PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#else +#define PREDICT_FALSE(x) x +#define PREDICT_TRUE(x) x +#endif + +// This is only used for recomputing the tag byte table used during +// decompression; for simplicity we just remove it from the open-source +// version (anyone who wants to regenerate it can just do the call +// themselves within main()). +#define DEFINE_bool(flag_name, default_value, description) \ + bool FLAGS_ ## flag_name = default_value +#define DECLARE_bool(flag_name) \ + extern bool FLAGS_ ## flag_name + +namespace snappy { + +static const uint32 kuint32max = static_cast(0xFFFFFFFF); +static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); + +// Potentially unaligned loads and stores. + +// x86 and PowerPC can simply do these loads and stores native. + +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) + +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) + +// ARMv7 and newer support native unaligned accesses, but only of 16-bit +// and 32-bit values (not 64-bit); older versions either raise a fatal signal, +// do an unaligned read and rotate the words around a bit, or do the reads very +// slowly (trip through kernel mode). There's no simple #define that says just +// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 +// sub-architectures. +// +// This is a mess, but there's not much we can do about it. + +#elif defined(__arm__) && \ + !defined(__ARM_ARCH_4__) && \ + !defined(__ARM_ARCH_4T__) && \ + !defined(__ARM_ARCH_5__) && \ + !defined(__ARM_ARCH_5T__) && \ + !defined(__ARM_ARCH_5TE__) && \ + !defined(__ARM_ARCH_5TEJ__) && \ + !defined(__ARM_ARCH_6__) && \ + !defined(__ARM_ARCH_6J__) && \ + !defined(__ARM_ARCH_6K__) && \ + !defined(__ARM_ARCH_6Z__) && \ + !defined(__ARM_ARCH_6ZK__) && \ + !defined(__ARM_ARCH_6T2__) + +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) + +// TODO(user): NEON supports unaligned 64-bit loads and stores. +// See if that would be more efficient on platforms supporting it, +// at least for copies. + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} + +#else + +// These functions are provided for architectures that don't support +// unaligned loads and stores. + +inline uint16 UNALIGNED_LOAD16(const void *p) { + uint16 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint32 UNALIGNED_LOAD32(const void *p) { + uint32 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline void UNALIGNED_STORE16(void *p, uint16 v) { + memcpy(p, &v, sizeof v); +} + +inline void UNALIGNED_STORE32(void *p, uint32 v) { + memcpy(p, &v, sizeof v); +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} + +#endif + +// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64 +// on some platforms, in particular ARM. +inline void UnalignedCopy64(const void *src, void *dst) { + if (sizeof(void *) == 8) { + UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); + } else { + const char *src_char = reinterpret_cast(src); + char *dst_char = reinterpret_cast(dst); + + UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char)); + UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4)); + } +} + +// The following guarantees declaration of the byte swap functions. +#ifdef WORDS_BIGENDIAN + +#ifdef HAVE_SYS_BYTEORDER_H +#include +#endif + +#ifdef HAVE_SYS_ENDIAN_H +#include +#endif + +#ifdef _MSC_VER +#include +#define bswap_16(x) _byteswap_ushort(x) +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) +// Mac OS X / Darwin features +#include +#define bswap_16(x) OSSwapInt16(x) +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) + +#elif defined(HAVE_BYTESWAP_H) +#include + +#elif defined(bswap32) +// FreeBSD defines bswap{16,32,64} in (already #included). +#define bswap_16(x) bswap16(x) +#define bswap_32(x) bswap32(x) +#define bswap_64(x) bswap64(x) + +#elif defined(BSWAP_64) +// Solaris 10 defines BSWAP_{16,32,64} in (already #included). +#define bswap_16(x) BSWAP_16(x) +#define bswap_32(x) BSWAP_32(x) +#define bswap_64(x) BSWAP_64(x) + +#else + +inline uint16 bswap_16(uint16 x) { + return (x << 8) | (x >> 8); +} + +inline uint32 bswap_32(uint32 x) { + x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); + return (x >> 16) | (x << 16); +} + +inline uint64 bswap_64(uint64 x) { + x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); + x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); + return (x >> 32) | (x << 32); +} + +#endif + +#endif // WORDS_BIGENDIAN + +// Convert to little-endian storage, opposite of network format. +// Convert x from host to little endian: x = LittleEndian.FromHost(x); +// convert x from little endian to host: x = LittleEndian.ToHost(x); +// +// Store values into unaligned memory converting to little endian order: +// LittleEndian.Store16(p, x); +// +// Load unaligned values stored in little endian converting to host order: +// x = LittleEndian.Load16(p); +class LittleEndian { + public: + // Conversion functions. +#ifdef WORDS_BIGENDIAN + + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } + + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } + + static bool IsLittleEndian() { return false; } + +#else // !defined(WORDS_BIGENDIAN) + + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } + + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } + + static bool IsLittleEndian() { return true; } + +#endif // !defined(WORDS_BIGENDIAN) + + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } + + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } + + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } + + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } +}; + +// Some bit-manipulation functions. +class Bits { + public: + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. + static int Log2Floor(uint32 n); + + // Return the first set least / most significant bit, 0-indexed. Returns an + // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except + // that it's 0-indexed. + static int FindLSBSetNonZero(uint32 n); + static int FindLSBSetNonZero64(uint64 n); + + private: + DISALLOW_COPY_AND_ASSIGN(Bits); +}; + +#ifdef HAVE_BUILTIN_CTZ + +inline int Bits::Log2Floor(uint32 n) { + return n == 0 ? -1 : 31 ^ __builtin_clz(n); +} + +inline int Bits::FindLSBSetNonZero(uint32 n) { + return __builtin_ctz(n); +} + +inline int Bits::FindLSBSetNonZero64(uint64 n) { + return __builtin_ctzll(n); +} + +#else // Portable versions. + +inline int Bits::Log2Floor(uint32 n) { + if (n == 0) + return -1; + int log = 0; + uint32 value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32 x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + assert(value == 1); + return log; +} + +inline int Bits::FindLSBSetNonZero(uint32 n) { + int rc = 31; + for (int i = 4, shift = 1 << 4; i >= 0; --i) { + const uint32 x = n << shift; + if (x != 0) { + n = x; + rc -= shift; + } + shift >>= 1; + } + return rc; +} + +// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). +inline int Bits::FindLSBSetNonZero64(uint64 n) { + const uint32 bottombits = static_cast(n); + if (bottombits == 0) { + // Bottom bits are zero, so scan in top bits + return 32 + FindLSBSetNonZero(static_cast(n >> 32)); + } else { + return FindLSBSetNonZero(bottombits); + } +} + +#endif // End portable versions. + +// Variable-length integer encoding. +class Varint { + public: + // Maximum lengths of varint encoding of uint32. + static const int kMax32 = 5; + + // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. + // Never reads a character at or beyond limit. If a valid/terminated varint32 + // was found in the range, stores it in *OUTPUT and returns a pointer just + // past the last byte of the varint32. Else returns NULL. On success, + // "result <= limit". + static const char* Parse32WithLimit(const char* ptr, const char* limit, + uint32* OUTPUT); + + // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". + // EFFECTS Encodes "v" into "ptr" and returns a pointer to the + // byte just past the last encoded byte. + static char* Encode32(char* ptr, uint32 v); + + // EFFECTS Appends the varint representation of "value" to "*s". + static void Append32(string* s, uint32 value); +}; + +inline const char* Varint::Parse32WithLimit(const char* p, + const char* l, + uint32* OUTPUT) { + const unsigned char* ptr = reinterpret_cast(p); + const unsigned char* limit = reinterpret_cast(l); + uint32 b, result; + if (ptr >= limit) return NULL; + b = *(ptr++); result = b & 127; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; + return NULL; // Value is too long to be a varint32 + done: + *OUTPUT = result; + return reinterpret_cast(ptr); +} + +inline char* Varint::Encode32(char* sptr, uint32 v) { + // Operate on characters as unsigneds + unsigned char* ptr = reinterpret_cast(sptr); + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return reinterpret_cast(ptr); +} + +// If you know the internal layout of the std::string in use, you can +// replace this function with one that resizes the string without +// filling the new space with zeros (if applicable) -- +// it will be non-portable but faster. +inline void STLStringResizeUninitialized(string* s, size_t new_size) { + s->resize(new_size); +} + +// Return a mutable char* pointing to a string's internal buffer, +// which may not be null-terminated. Writing through this pointer will +// modify the string. +// +// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the +// next call to a string method that invalidates iterators. +// +// As of 2006-04, there is no standard-blessed way of getting a +// mutable reference to a string's internal buffer. However, issue 530 +// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) +// proposes this as the method. It will officially be part of the standard +// for C++0x. This should already work on all current implementations. +inline char* string_as_array(string* str) { + return str->empty() ? NULL : &*str->begin(); +} + +} // namespace snappy + +#endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-public.h b/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-public.h new file mode 100644 index 0000000..4cc8965 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy-stubs-public.h @@ -0,0 +1,111 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: sesse@google.com (Steinar H. Gunderson) +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various type stubs for the open-source version of Snappy. +// +// This file cannot include config.h, as it is included from snappy.h, +// which is a public header. Instead, snappy-stubs-public.h is generated by +// from snappy-stubs-public.h.in at configure time. + +#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ +#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ + +// MSVC 2008 does not include stdint.h. This is a workaround by Mark W. +// Please note that this is only defined in the Blosc sources of Snappy. +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +#include +#else +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef __int64 int64_t; +typedef ptrdiff_t intptr_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +typedef size_t uintptr_t; +#endif + +#if 1 +#include +#endif + +#if 0 +#include +#endif + +#define SNAPPY_MAJOR 1 +#define SNAPPY_MINOR 1 +#define SNAPPY_PATCHLEVEL 1 +#define SNAPPY_VERSION \ + ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) + +#include + +namespace snappy { + +#if 1 +typedef int8_t int8; +typedef uint8_t uint8; +typedef int16_t int16; +typedef uint16_t uint16; +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; +#else +typedef signed char int8; +typedef unsigned char uint8; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; +#endif + +typedef std::string string; + +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +#if !0 +// Windows does not have an iovec type, yet the concept is universally useful. +// It is simple to define it ourselves, so we put it inside our own namespace. +struct iovec { + void* iov_base; + size_t iov_len; +}; +#endif + +} // namespace snappy + +#endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy.cc b/c-blosc/internal-complibs/snappy-1.1.1/snappy.cc new file mode 100644 index 0000000..f8d0d23 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy.cc @@ -0,0 +1,1306 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "snappy.h" +#include "snappy-internal.h" +#include "snappy-sinksource.h" + +#include + +#include +#include +#include + + +namespace snappy { + +// Any hash function will produce a valid compressed bitstream, but a good +// hash function reduces the number of collisions and thus yields better +// compression for compressible input, and more speed for incompressible +// input. Of course, it doesn't hurt if the hash function is reasonably fast +// either, as it gets called a lot. +static inline uint32 HashBytes(uint32 bytes, int shift) { + uint32 kMul = 0x1e35a7bd; + return (bytes * kMul) >> shift; +} +static inline uint32 Hash(const char* p, int shift) { + return HashBytes(UNALIGNED_LOAD32(p), shift); +} + +size_t MaxCompressedLength(size_t source_len) { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + source_len + source_len/6; +} + +enum { + LITERAL = 0, + COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode + COPY_2_BYTE_OFFSET = 2, + COPY_4_BYTE_OFFSET = 3 +}; +static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. + +// Copy "len" bytes from "src" to "op", one byte at a time. Used for +// handling COPY operations where the input and output regions may +// overlap. For example, suppose: +// src == "ab" +// op == src + 2 +// len == 20 +// After IncrementalCopy(src, op, len), the result will have +// eleven copies of "ab" +// ababababababababababab +// Note that this does not match the semantics of either memcpy() +// or memmove(). +static inline void IncrementalCopy(const char* src, char* op, ssize_t len) { + assert(len > 0); + do { + *op++ = *src++; + } while (--len > 0); +} + +// Equivalent to IncrementalCopy except that it can write up to ten extra +// bytes after the end of the copy, and that it is faster. +// +// The main part of this loop is a simple copy of eight bytes at a time until +// we've copied (at least) the requested amount of bytes. However, if op and +// src are less than eight bytes apart (indicating a repeating pattern of +// length < 8), we first need to expand the pattern in order to get the correct +// results. For instance, if the buffer looks like this, with the eight-byte +// and patterns marked as intervals: +// +// abxxxxxxxxxxxx +// [------] src +// [------] op +// +// a single eight-byte copy from to will repeat the pattern once, +// after which we can move two bytes without moving : +// +// ababxxxxxxxxxx +// [------] src +// [------] op +// +// and repeat the exercise until the two no longer overlap. +// +// This allows us to do very well in the special case of one single byte +// repeated many times, without taking a big hit for more general cases. +// +// The worst case of extra writing past the end of the match occurs when +// op - src == 1 and len == 1; the last copy will read from byte positions +// [0..7] and write to [4..11], whereas it was only supposed to write to +// position 1. Thus, ten excess bytes. + +namespace { + +const int kMaxIncrementCopyOverflow = 10; + +inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) { + while (op - src < 8) { + UnalignedCopy64(src, op); + len -= op - src; + op += op - src; + } + while (len > 0) { + UnalignedCopy64(src, op); + src += 8; + op += 8; + len -= 8; + } +} + +} // namespace + +static inline char* EmitLiteral(char* op, + const char* literal, + int len, + bool allow_fast_path) { + int n = len - 1; // Zero-length literals are disallowed + if (n < 60) { + // Fits in tag byte + *op++ = LITERAL | (n << 2); + + // The vast majority of copies are below 16 bytes, for which a + // call to memcpy is overkill. This fast path can sometimes + // copy up to 15 bytes too much, but that is okay in the + // main loop, since we have a bit to go on for both sides: + // + // - The input will always have kInputMarginBytes = 15 extra + // available bytes, as long as we're in the main loop, and + // if not, allow_fast_path = false. + // - The output will always have 32 spare bytes (see + // MaxCompressedLength). + if (allow_fast_path && len <= 16) { + UnalignedCopy64(literal, op); + UnalignedCopy64(literal + 8, op + 8); + return op + len; + } + } else { + // Encode in upcoming bytes + char* base = op; + int count = 0; + op++; + while (n > 0) { + *op++ = n & 0xff; + n >>= 8; + count++; + } + assert(count >= 1); + assert(count <= 4); + *base = LITERAL | ((59+count) << 2); + } + memcpy(op, literal, len); + return op + len; +} + +static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) { + assert(len <= 64); + assert(len >= 4); + assert(offset < 65536); + + if ((len < 12) && (offset < 2048)) { + size_t len_minus_4 = len - 4; + assert(len_minus_4 < 8); // Must fit in 3 bits + *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5); + *op++ = offset & 0xff; + } else { + *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2); + LittleEndian::Store16(op, offset); + op += 2; + } + return op; +} + +static inline char* EmitCopy(char* op, size_t offset, int len) { + // Emit 64 byte copies but make sure to keep at least four bytes reserved + while (len >= 68) { + op = EmitCopyLessThan64(op, offset, 64); + len -= 64; + } + + // Emit an extra 60 byte copy if have too much data to fit in one copy + if (len > 64) { + op = EmitCopyLessThan64(op, offset, 60); + len -= 60; + } + + // Emit remainder + op = EmitCopyLessThan64(op, offset, len); + return op; +} + + +bool GetUncompressedLength(const char* start, size_t n, size_t* result) { + uint32 v = 0; + const char* limit = start + n; + if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { + *result = v; + return true; + } else { + return false; + } +} + +namespace internal { +uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { + // Use smaller hash table when input.size() is smaller, since we + // fill the table, incurring O(hash table size) overhead for + // compression, and if the input is short, we won't need that + // many hash table entries anyway. + assert(kMaxHashTableSize >= 256); + size_t htsize = 256; + while (htsize < kMaxHashTableSize && htsize < input_size) { + htsize <<= 1; + } + + uint16* table; + if (htsize <= ARRAYSIZE(small_table_)) { + table = small_table_; + } else { + if (large_table_ == NULL) { + large_table_ = new uint16[kMaxHashTableSize]; + } + table = large_table_; + } + + *table_size = htsize; + memset(table, 0, htsize * sizeof(*table)); + return table; +} +} // end namespace internal + +// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will +// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have +// empirically found that overlapping loads such as +// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) +// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. +// +// We have different versions for 64- and 32-bit; ideally we would avoid the +// two functions and just inline the UNALIGNED_LOAD64 call into +// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever +// enough to avoid loading the value multiple times then. For 64-bit, the load +// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is +// done at GetUint32AtOffset() time. + +#ifdef ARCH_K8 + +typedef uint64 EightBytesReference; + +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return UNALIGNED_LOAD64(ptr); +} + +static inline uint32 GetUint32AtOffset(uint64 v, int offset) { + assert(offset >= 0); + assert(offset <= 4); + return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); +} + +#else + +typedef const char* EightBytesReference; + +static inline EightBytesReference GetEightBytesAt(const char* ptr) { + return ptr; +} + +static inline uint32 GetUint32AtOffset(const char* v, int offset) { + assert(offset >= 0); + assert(offset <= 4); + return UNALIGNED_LOAD32(v + offset); +} + +#endif + +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input" is at most "kBlockSize" bytes long. +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input.size())" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +namespace internal { +char* CompressFragment(const char* input, + size_t input_size, + char* op, + uint16* table, + const int table_size) { + // "ip" is the input pointer, and "op" is the output pointer. + const char* ip = input; + assert(input_size <= kBlockSize); + assert((table_size & (table_size - 1)) == 0); // table must be power of two + const int shift = 32 - Bits::Log2Floor(table_size); + assert(static_cast(kuint32max >> shift) == table_size - 1); + const char* ip_end = input + input_size; + const char* base_ip = ip; + // Bytes in [next_emit, ip) will be emitted as literal bytes. Or + // [next_emit, ip_end) after the main loop. + const char* next_emit = ip; + + const size_t kInputMarginBytes = 15; + if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { + const char* ip_limit = input + input_size - kInputMarginBytes; + + for (uint32 next_hash = Hash(++ip, shift); ; ) { + assert(next_emit < ip); + // The body of this loop calls EmitLiteral once and then EmitCopy one or + // more times. (The exception is that when we're close to exhausting + // the input we goto emit_remainder.) + // + // In the first iteration of this loop we're just starting, so + // there's nothing to copy, so calling EmitLiteral once is + // necessary. And we only start a new iteration when the + // current iteration has determined that a call to EmitLiteral will + // precede the next call to EmitCopy (if any). + // + // Step 1: Scan forward in the input looking for a 4-byte-long match. + // If we get close to exhausting the input then goto emit_remainder. + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned, look at every third byte, etc.. When a match is found, + // immediately go back to looking at every byte. This is a small loss + // (~5% performance, ~0.1% density) for compressible data due to more + // bookkeeping, but for non-compressible data (such as JPEG) it's a huge + // win since the compressor quickly "realizes" the data is incompressible + // and doesn't bother looking for matches everywhere. + // + // The "skip" variable keeps track of how many bytes there are since the + // last match; dividing it by 32 (ie. right-shifting by five) gives the + // number of bytes to move ahead for each iteration. + uint32 skip = 32; + + const char* next_ip = ip; + const char* candidate; + do { + ip = next_ip; + uint32 hash = next_hash; + assert(hash == Hash(ip, shift)); + uint32 bytes_between_hash_lookups = skip++ >> 5; + next_ip = ip + bytes_between_hash_lookups; + if (PREDICT_FALSE(next_ip > ip_limit)) { + goto emit_remainder; + } + next_hash = Hash(next_ip, shift); + candidate = base_ip + table[hash]; + assert(candidate >= base_ip); + assert(candidate < ip); + + table[hash] = ip - base_ip; + } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) != + UNALIGNED_LOAD32(candidate))); + + // Step 2: A 4-byte match has been found. We'll later see if more + // than 4 bytes match. But, prior to the match, input + // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + assert(next_emit + 16 <= ip_end); + op = EmitLiteral(op, next_emit, ip - next_emit, true); + + // Step 3: Call EmitCopy, and then see if another EmitCopy could + // be our next move. Repeat until we find no match for the + // input immediately after what was consumed by the last EmitCopy call. + // + // If we exit this loop normally then we need to call EmitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can exit + // this loop via goto if we get close to exhausting the input. + EightBytesReference input_bytes; + uint32 candidate_bytes = 0; + + do { + // We have a 4-byte match at ip, and no need to emit any + // "literal bytes" prior to ip. + const char* base = ip; + int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); + ip += matched; + size_t offset = base - candidate; + assert(0 == memcmp(base, candidate, matched)); + op = EmitCopy(op, offset, matched); + // We could immediately start working at ip now, but to improve + // compression we first update table[Hash(ip - 1, ...)]. + const char* insert_tail = ip - 1; + next_emit = ip; + if (PREDICT_FALSE(ip >= ip_limit)) { + goto emit_remainder; + } + input_bytes = GetEightBytesAt(insert_tail); + uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); + table[prev_hash] = ip - base_ip - 1; + uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); + candidate = base_ip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - base_ip; + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); + ++ip; + } + } + + emit_remainder: + // Emit the remaining bytes as a literal + if (next_emit < ip_end) { + op = EmitLiteral(op, next_emit, ip_end - next_emit, false); + } + + return op; +} +} // end namespace internal + +// Signature of output types needed by decompression code. +// The decompression code is templatized on a type that obeys this +// signature so that we do not pay virtual function call overhead in +// the middle of a tight decompression loop. +// +// class DecompressionWriter { +// public: +// // Called before decompression +// void SetExpectedLength(size_t length); +// +// // Called after decompression +// bool CheckLength() const; +// +// // Called repeatedly during decompression +// bool Append(const char* ip, size_t length); +// bool AppendFromSelf(uint32 offset, size_t length); +// +// // The rules for how TryFastAppend differs from Append are somewhat +// // convoluted: +// // +// // - TryFastAppend is allowed to decline (return false) at any +// // time, for any reason -- just "return false" would be +// // a perfectly legal implementation of TryFastAppend. +// // The intention is for TryFastAppend to allow a fast path +// // in the common case of a small append. +// // - TryFastAppend is allowed to read up to bytes +// // from the input buffer, whereas Append is allowed to read +// // . However, if it returns true, it must leave +// // at least five (kMaximumTagLength) bytes in the input buffer +// // afterwards, so that there is always enough space to read the +// // next tag without checking for a refill. +// // - TryFastAppend must always return decline (return false) +// // if is 61 or more, as in this case the literal length is not +// // decoded fully. In practice, this should not be a big problem, +// // as it is unlikely that one would implement a fast path accepting +// // this much data. +// // +// bool TryFastAppend(const char* ip, size_t available, size_t length); +// }; + +// ----------------------------------------------------------------------- +// Lookup table for decompression code. Generated by ComputeTable() below. +// ----------------------------------------------------------------------- + +// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits +static const uint32 wordmask[] = { + 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu +}; + +// Data stored per entry in lookup table: +// Range Bits-used Description +// ------------------------------------ +// 1..64 0..7 Literal/copy length encoded in opcode byte +// 0..7 8..10 Copy offset encoded in opcode byte / 256 +// 0..4 11..13 Extra bytes after opcode +// +// We use eight bits for the length even though 7 would have sufficed +// because of efficiency reasons: +// (1) Extracting a byte is faster than a bit-field +// (2) It properly aligns copy offset so we do not need a <<8 +static const uint16 char_table[256] = { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 +}; + +// In debug mode, allow optional computation of the table at startup. +// Also, check that the decompression table is correct. +#ifndef NDEBUG +DEFINE_bool(snappy_dump_decompression_table, false, + "If true, we print the decompression table at startup."); + +static uint16 MakeEntry(unsigned int extra, + unsigned int len, + unsigned int copy_offset) { + // Check that all of the fields fit within the allocated space + assert(extra == (extra & 0x7)); // At most 3 bits + assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits + assert(len == (len & 0x7f)); // At most 7 bits + return len | (copy_offset << 8) | (extra << 11); +} + +static void ComputeTable() { + uint16 dst[256]; + + // Place invalid entries in all places to detect missing initialization + int assigned = 0; + for (int i = 0; i < 256; i++) { + dst[i] = 0xffff; + } + + // Small LITERAL entries. We store (len-1) in the top 6 bits. + for (unsigned int len = 1; len <= 60; len++) { + dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); + assigned++; + } + + // Large LITERAL entries. We use 60..63 in the high 6 bits to + // encode the number of bytes of length info that follow the opcode. + for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { + // We set the length field in the lookup table to 1 because extra + // bytes encode len-1. + dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); + assigned++; + } + + // COPY_1_BYTE_OFFSET. + // + // The tag byte in the compressed data stores len-4 in 3 bits, and + // offset/256 in 5 bits. offset%256 is stored in the next byte. + // + // This format is used for length in range [4..11] and offset in + // range [0..2047] + for (unsigned int len = 4; len < 12; len++) { + for (unsigned int offset = 0; offset < 2048; offset += 256) { + dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = + MakeEntry(1, len, offset>>8); + assigned++; + } + } + + // COPY_2_BYTE_OFFSET. + // Tag contains len-1 in top 6 bits, and offset in next two bytes. + for (unsigned int len = 1; len <= 64; len++) { + dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); + assigned++; + } + + // COPY_4_BYTE_OFFSET. + // Tag contents len-1 in top 6 bits, and offset in next four bytes. + for (unsigned int len = 1; len <= 64; len++) { + dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); + assigned++; + } + + // Check that each entry was initialized exactly once. + if (assigned != 256) { + fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned); + abort(); + } + for (int i = 0; i < 256; i++) { + if (dst[i] == 0xffff) { + fprintf(stderr, "ComputeTable: did not assign byte %d\n", i); + abort(); + } + } + + if (FLAGS_snappy_dump_decompression_table) { + printf("static const uint16 char_table[256] = {\n "); + for (int i = 0; i < 256; i++) { + printf("0x%04x%s", + dst[i], + ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); + } + printf("};\n"); + } + + // Check that computed table matched recorded table + for (int i = 0; i < 256; i++) { + if (dst[i] != char_table[i]) { + fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n", + i, static_cast(dst[i]), static_cast(char_table[i])); + abort(); + } + } +} +#endif /* !NDEBUG */ + +// Helper class for decompression +class SnappyDecompressor { + private: + Source* reader_; // Underlying source of bytes to decompress + const char* ip_; // Points to next buffered byte + const char* ip_limit_; // Points just past buffered bytes + uint32 peeked_; // Bytes peeked from reader (need to skip) + bool eof_; // Hit end of input without an error? + char scratch_[kMaximumTagLength]; // See RefillTag(). + + // Ensure that all of the tag metadata for the next tag is available + // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even + // if (ip_limit_ - ip_ < 5). + // + // Returns true on success, false on error or end of input. + bool RefillTag(); + + public: + explicit SnappyDecompressor(Source* reader) + : reader_(reader), + ip_(NULL), + ip_limit_(NULL), + peeked_(0), + eof_(false) { + } + + ~SnappyDecompressor() { + // Advance past any bytes we peeked at from the reader + reader_->Skip(peeked_); + } + + // Returns true iff we have hit the end of the input without an error. + bool eof() const { + return eof_; + } + + // Read the uncompressed length stored at the start of the compressed data. + // On succcess, stores the length in *result and returns true. + // On failure, returns false. + bool ReadUncompressedLength(uint32* result) { + assert(ip_ == NULL); // Must not have read anything yet + // Length is encoded in 1..5 bytes + *result = 0; + uint32 shift = 0; + while (true) { + if (shift >= 32) return false; + size_t n; + const char* ip = reader_->Peek(&n); + if (n == 0) return false; + const unsigned char c = *(reinterpret_cast(ip)); + reader_->Skip(1); + *result |= static_cast(c & 0x7f) << shift; + if (c < 128) { + break; + } + shift += 7; + } + return true; + } + + // Process the next item found in the input. + // Returns true if successful, false on error or end of input. + template + void DecompressAllTags(Writer* writer) { + const char* ip = ip_; + + // We could have put this refill fragment only at the beginning of the loop. + // However, duplicating it at the end of each branch gives the compiler more + // scope to optimize the expression based on the local + // context, which overall increases speed. + #define MAYBE_REFILL() \ + if (ip_limit_ - ip < kMaximumTagLength) { \ + ip_ = ip; \ + if (!RefillTag()) return; \ + ip = ip_; \ + } + + MAYBE_REFILL(); + for ( ;; ) { + const unsigned char c = *(reinterpret_cast(ip++)); + + if ((c & 0x3) == LITERAL) { + size_t literal_length = (c >> 2) + 1u; + if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { + assert(literal_length < 61); + ip += literal_length; + // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend() + // will not return true unless there's already at least five spare + // bytes in addition to the literal. + continue; + } + if (PREDICT_FALSE(literal_length >= 61)) { + // Long literal. + const size_t literal_length_length = literal_length - 60; + literal_length = + (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; + ip += literal_length_length; + } + + size_t avail = ip_limit_ - ip; + while (avail < literal_length) { + if (!writer->Append(ip, avail)) return; + literal_length -= avail; + reader_->Skip(peeked_); + size_t n; + ip = reader_->Peek(&n); + avail = n; + peeked_ = avail; + if (avail == 0) return; // Premature end of input + ip_limit_ = ip + avail; + } + if (!writer->Append(ip, literal_length)) { + return; + } + ip += literal_length; + MAYBE_REFILL(); + } else { + const uint32 entry = char_table[c]; + const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; + const uint32 length = entry & 0xff; + ip += entry >> 11; + + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + const uint32 copy_offset = entry & 0x700; + if (!writer->AppendFromSelf(copy_offset + trailer, length)) { + return; + } + MAYBE_REFILL(); + } + } + +#undef MAYBE_REFILL + } +}; + +bool SnappyDecompressor::RefillTag() { + const char* ip = ip_; + if (ip == ip_limit_) { + // Fetch a new fragment from the reader + reader_->Skip(peeked_); // All peeked bytes are used up + size_t n; + ip = reader_->Peek(&n); + peeked_ = n; + if (n == 0) { + eof_ = true; + return false; + } + ip_limit_ = ip + n; + } + + // Read the tag character + assert(ip < ip_limit_); + const unsigned char c = *(reinterpret_cast(ip)); + const uint32 entry = char_table[c]; + const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' + assert(needed <= sizeof(scratch_)); + + // Read more bytes from reader if needed + uint32 nbuf = ip_limit_ - ip; + if (nbuf < needed) { + // Stitch together bytes from ip and reader to form the word + // contents. We store the needed bytes in "scratch_". They + // will be consumed immediately by the caller since we do not + // read more than we need. + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + while (nbuf < needed) { + size_t length; + const char* src = reader_->Peek(&length); + if (length == 0) return false; + uint32 to_add = min(needed - nbuf, length); + memcpy(scratch_ + nbuf, src, to_add); + nbuf += to_add; + reader_->Skip(to_add); + } + assert(nbuf == needed); + ip_ = scratch_; + ip_limit_ = scratch_ + needed; + } else if (nbuf < kMaximumTagLength) { + // Have enough bytes, but move into scratch_ so that we do not + // read past end of input + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + ip_ = scratch_; + ip_limit_ = scratch_ + nbuf; + } else { + // Pass pointer to buffer returned by reader_. + ip_ = ip; + } + return true; +} + +template +static bool InternalUncompress(Source* r, Writer* writer) { + // Read the uncompressed length from the front of the compressed input + SnappyDecompressor decompressor(r); + uint32 uncompressed_len = 0; + if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; + return InternalUncompressAllTags(&decompressor, writer, uncompressed_len); +} + +template +static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, + Writer* writer, + uint32 uncompressed_len) { + writer->SetExpectedLength(uncompressed_len); + + // Process the entire input + decompressor->DecompressAllTags(writer); + return (decompressor->eof() && writer->CheckLength()); +} + +bool GetUncompressedLength(Source* source, uint32* result) { + SnappyDecompressor decompressor(source); + return decompressor.ReadUncompressedLength(result); +} + +size_t Compress(Source* reader, Sink* writer) { + size_t written = 0; + size_t N = reader->Available(); + char ulength[Varint::kMax32]; + char* p = Varint::Encode32(ulength, N); + writer->Append(ulength, p-ulength); + written += (p - ulength); + + internal::WorkingMemory wmem; + char* scratch = NULL; + char* scratch_output = NULL; + + while (N > 0) { + // Get next block to compress (without copying if possible) + size_t fragment_size; + const char* fragment = reader->Peek(&fragment_size); + assert(fragment_size != 0); // premature end of input + const size_t num_to_read = min(N, kBlockSize); + size_t bytes_read = fragment_size; + + size_t pending_advance = 0; + if (bytes_read >= num_to_read) { + // Buffer returned by reader is large enough + pending_advance = num_to_read; + fragment_size = num_to_read; + } else { + // Read into scratch buffer + if (scratch == NULL) { + // If this is the last iteration, we want to allocate N bytes + // of space, otherwise the max possible kBlockSize space. + // num_to_read contains exactly the correct value + scratch = new char[num_to_read]; + } + memcpy(scratch, fragment, bytes_read); + reader->Skip(bytes_read); + + while (bytes_read < num_to_read) { + fragment = reader->Peek(&fragment_size); + size_t n = min(fragment_size, num_to_read - bytes_read); + memcpy(scratch + bytes_read, fragment, n); + bytes_read += n; + reader->Skip(n); + } + assert(bytes_read == num_to_read); + fragment = scratch; + fragment_size = num_to_read; + } + assert(fragment_size == num_to_read); + + // Get encoding table for compression + int table_size; + uint16* table = wmem.GetHashTable(num_to_read, &table_size); + + // Compress input_fragment and append to dest + const int max_output = MaxCompressedLength(num_to_read); + + // Need a scratch buffer for the output, in case the byte sink doesn't + // have room for us directly. + if (scratch_output == NULL) { + scratch_output = new char[max_output]; + } else { + // Since we encode kBlockSize regions followed by a region + // which is <= kBlockSize in length, a previously allocated + // scratch_output[] region is big enough for this iteration. + } + char* dest = writer->GetAppendBuffer(max_output, scratch_output); + char* end = internal::CompressFragment(fragment, fragment_size, + dest, table, table_size); + writer->Append(dest, end - dest); + written += (end - dest); + + N -= num_to_read; + reader->Skip(pending_advance); + } + + delete[] scratch; + delete[] scratch_output; + + return written; +} + +// ----------------------------------------------------------------------- +// IOVec interfaces +// ----------------------------------------------------------------------- + +// A type that writes to an iovec. +// Note that this is not a "ByteSink", but a type that matches the +// Writer template argument to SnappyDecompressor::DecompressAllTags(). +class SnappyIOVecWriter { + private: + const struct iovec* output_iov_; + const size_t output_iov_count_; + + // We are currently writing into output_iov_[curr_iov_index_]. + int curr_iov_index_; + + // Bytes written to output_iov_[curr_iov_index_] so far. + size_t curr_iov_written_; + + // Total bytes decompressed into output_iov_ so far. + size_t total_written_; + + // Maximum number of bytes that will be decompressed into output_iov_. + size_t output_limit_; + + inline char* GetIOVecPointer(int index, size_t offset) { + return reinterpret_cast(output_iov_[index].iov_base) + + offset; + } + + public: + // Does not take ownership of iov. iov must be valid during the + // entire lifetime of the SnappyIOVecWriter. + inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) + : output_iov_(iov), + output_iov_count_(iov_count), + curr_iov_index_(0), + curr_iov_written_(0), + total_written_(0), + output_limit_(-1) { + } + + inline void SetExpectedLength(size_t len) { + output_limit_ = len; + } + + inline bool CheckLength() const { + return total_written_ == output_limit_; + } + + inline bool Append(const char* ip, size_t len) { + if (total_written_ + len > output_limit_) { + return false; + } + + while (len > 0) { + assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); + if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) { + // This iovec is full. Go to the next one. + if (curr_iov_index_ + 1 >= output_iov_count_) { + return false; + } + curr_iov_written_ = 0; + ++curr_iov_index_; + } + + const size_t to_write = std::min( + len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_); + memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_), + ip, + to_write); + curr_iov_written_ += to_write; + total_written_ += to_write; + ip += to_write; + len -= to_write; + } + + return true; + } + + inline bool TryFastAppend(const char* ip, size_t available, size_t len) { + const size_t space_left = output_limit_ - total_written_; + if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && + output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) { + // Fast path, used for the majority (about 95%) of invocations. + char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_); + UnalignedCopy64(ip, ptr); + UnalignedCopy64(ip + 8, ptr + 8); + curr_iov_written_ += len; + total_written_ += len; + return true; + } + + return false; + } + + inline bool AppendFromSelf(size_t offset, size_t len) { + if (offset > total_written_ || offset == 0) { + return false; + } + const size_t space_left = output_limit_ - total_written_; + if (len > space_left) { + return false; + } + + // Locate the iovec from which we need to start the copy. + int from_iov_index = curr_iov_index_; + size_t from_iov_offset = curr_iov_written_; + while (offset > 0) { + if (from_iov_offset >= offset) { + from_iov_offset -= offset; + break; + } + + offset -= from_iov_offset; + --from_iov_index; + assert(from_iov_index >= 0); + from_iov_offset = output_iov_[from_iov_index].iov_len; + } + + // Copy bytes starting from the iovec pointed to by from_iov_index to + // the current iovec. + while (len > 0) { + assert(from_iov_index <= curr_iov_index_); + if (from_iov_index != curr_iov_index_) { + const size_t to_copy = std::min( + output_iov_[from_iov_index].iov_len - from_iov_offset, + len); + Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy); + len -= to_copy; + if (len > 0) { + ++from_iov_index; + from_iov_offset = 0; + } + } else { + assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); + size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len - + curr_iov_written_, + len); + if (to_copy == 0) { + // This iovec is full. Go to the next one. + if (curr_iov_index_ + 1 >= output_iov_count_) { + return false; + } + ++curr_iov_index_; + curr_iov_written_ = 0; + continue; + } + if (to_copy > len) { + to_copy = len; + } + IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset), + GetIOVecPointer(curr_iov_index_, curr_iov_written_), + to_copy); + curr_iov_written_ += to_copy; + from_iov_offset += to_copy; + total_written_ += to_copy; + len -= to_copy; + } + } + + return true; + } + +}; + +bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, + const struct iovec* iov, size_t iov_cnt) { + ByteArraySource reader(compressed, compressed_length); + return RawUncompressToIOVec(&reader, iov, iov_cnt); +} + +bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, + size_t iov_cnt) { + SnappyIOVecWriter output(iov, iov_cnt); + return InternalUncompress(compressed, &output); +} + +// ----------------------------------------------------------------------- +// Flat array interfaces +// ----------------------------------------------------------------------- + +// A type that writes to a flat array. +// Note that this is not a "ByteSink", but a type that matches the +// Writer template argument to SnappyDecompressor::DecompressAllTags(). +class SnappyArrayWriter { + private: + char* base_; + char* op_; + char* op_limit_; + + public: + inline explicit SnappyArrayWriter(char* dst) + : base_(dst), + op_(dst) { + } + + inline void SetExpectedLength(size_t len) { + op_limit_ = op_ + len; + } + + inline bool CheckLength() const { + return op_ == op_limit_; + } + + inline bool Append(const char* ip, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; + if (space_left < len) { + return false; + } + memcpy(op, ip, len); + op_ = op + len; + return true; + } + + inline bool TryFastAppend(const char* ip, size_t available, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; + if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { + // Fast path, used for the majority (about 95%) of invocations. + UnalignedCopy64(ip, op); + UnalignedCopy64(ip + 8, op + 8); + op_ = op + len; + return true; + } else { + return false; + } + } + + inline bool AppendFromSelf(size_t offset, size_t len) { + char* op = op_; + const size_t space_left = op_limit_ - op; + + // Check if we try to append from before the start of the buffer. + // Normally this would just be a check for "produced < offset", + // but "produced <= offset - 1u" is equivalent for every case + // except the one where offset==0, where the right side will wrap around + // to a very big number. This is convenient, as offset==0 is another + // invalid case that we also want to catch, so that we do not go + // into an infinite loop. + assert(op >= base_); + size_t produced = op - base_; + if (produced <= offset - 1u) { + return false; + } + if (len <= 16 && offset >= 8 && space_left >= 16) { + // Fast path, used for the majority (70-80%) of dynamic invocations. + UnalignedCopy64(op - offset, op); + UnalignedCopy64(op - offset + 8, op + 8); + } else { + if (space_left >= len + kMaxIncrementCopyOverflow) { + IncrementalCopyFastPath(op - offset, op, len); + } else { + if (space_left < len) { + return false; + } + IncrementalCopy(op - offset, op, len); + } + } + + op_ = op + len; + return true; + } +}; + +bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { + ByteArraySource reader(compressed, n); + return RawUncompress(&reader, uncompressed); +} + +bool RawUncompress(Source* compressed, char* uncompressed) { + SnappyArrayWriter output(uncompressed); + return InternalUncompress(compressed, &output); +} + +bool Uncompress(const char* compressed, size_t n, string* uncompressed) { + size_t ulength; + if (!GetUncompressedLength(compressed, n, &ulength)) { + return false; + } + // On 32-bit builds: max_size() < kuint32max. Check for that instead + // of crashing (e.g., consider externally specified compressed data). + if (ulength > uncompressed->max_size()) { + return false; + } + STLStringResizeUninitialized(uncompressed, ulength); + return RawUncompress(compressed, n, string_as_array(uncompressed)); +} + + +// A Writer that drops everything on the floor and just does validation +class SnappyDecompressionValidator { + private: + size_t expected_; + size_t produced_; + + public: + inline SnappyDecompressionValidator() : produced_(0) { } + inline void SetExpectedLength(size_t len) { + expected_ = len; + } + inline bool CheckLength() const { + return expected_ == produced_; + } + inline bool Append(const char* ip, size_t len) { + produced_ += len; + return produced_ <= expected_; + } + inline bool TryFastAppend(const char* ip, size_t available, size_t length) { + return false; + } + inline bool AppendFromSelf(size_t offset, size_t len) { + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + if (produced_ <= offset - 1u) return false; + produced_ += len; + return produced_ <= expected_; + } +}; + +bool IsValidCompressedBuffer(const char* compressed, size_t n) { + ByteArraySource reader(compressed, n); + SnappyDecompressionValidator writer; + return InternalUncompress(&reader, &writer); +} + +void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length) { + ByteArraySource reader(input, input_length); + UncheckedByteArraySink writer(compressed); + Compress(&reader, &writer); + + // Compute how many bytes were added + *compressed_length = (writer.CurrentDestination() - compressed); +} + +size_t Compress(const char* input, size_t input_length, string* compressed) { + // Pre-grow the buffer to the max length of the compressed output + compressed->resize(MaxCompressedLength(input_length)); + + size_t compressed_length; + RawCompress(input, input_length, string_as_array(compressed), + &compressed_length); + compressed->resize(compressed_length); + return compressed_length; +} + + +} // end namespace snappy + diff --git a/c-blosc/internal-complibs/snappy-1.1.1/snappy.h b/c-blosc/internal-complibs/snappy-1.1.1/snappy.h new file mode 100644 index 0000000..244cc09 --- /dev/null +++ b/c-blosc/internal-complibs/snappy-1.1.1/snappy.h @@ -0,0 +1,192 @@ +// Copyright 2005 and onwards Google Inc. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// A light-weight compression algorithm. It is designed for speed of +// compression and decompression, rather than for the utmost in space +// savings. +// +// For getting better compression ratios when you are compressing data +// with long repeated sequences or compressing data that is similar to +// other data, while still compressing fast, you might look at first +// using BMDiff and then compressing the output of BMDiff with +// Snappy. + +#ifndef UTIL_SNAPPY_SNAPPY_H__ +#define UTIL_SNAPPY_SNAPPY_H__ + +#include +#include + +#include "snappy-stubs-public.h" + +// Windows does not define ssize_t by default. This is a workaround. +// Please note that this is only defined in the Blosc sources of Snappy. +#if defined(_WIN32) && !defined(__MINGW32__) +#include +typedef SSIZE_T ssize_t; +#endif + + +namespace snappy { + class Source; + class Sink; + + // ------------------------------------------------------------------------ + // Generic compression/decompression routines. + // ------------------------------------------------------------------------ + + // Compress the bytes read from "*source" and append to "*sink". Return the + // number of bytes written. + size_t Compress(Source* source, Sink* sink); + + // Find the uncompressed length of the given stream, as given by the header. + // Note that the true length could deviate from this; the stream could e.g. + // be truncated. + // + // Also note that this leaves "*source" in a state that is unsuitable for + // further operations, such as RawUncompress(). You will need to rewind + // or recreate the source yourself before attempting any further calls. + bool GetUncompressedLength(Source* source, uint32* result); + + // ------------------------------------------------------------------------ + // Higher-level string based routines (should be sufficient for most users) + // ------------------------------------------------------------------------ + + // Sets "*output" to the compressed version of "input[0,input_length-1]". + // Original contents of *output are lost. + // + // REQUIRES: "input[]" is not an alias of "*output". + size_t Compress(const char* input, size_t input_length, string* output); + + // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". + // Original contents of "*uncompressed" are lost. + // + // REQUIRES: "compressed[]" is not an alias of "*uncompressed". + // + // returns false if the message is corrupted and could not be decompressed + bool Uncompress(const char* compressed, size_t compressed_length, + string* uncompressed); + + + // ------------------------------------------------------------------------ + // Lower-level character array based routines. May be useful for + // efficiency reasons in certain circumstances. + // ------------------------------------------------------------------------ + + // REQUIRES: "compressed" must point to an area of memory that is at + // least "MaxCompressedLength(input_length)" bytes in length. + // + // Takes the data stored in "input[0..input_length]" and stores + // it in the array pointed to by "compressed". + // + // "*compressed_length" is set to the length of the compressed output. + // + // Example: + // char* output = new char[snappy::MaxCompressedLength(input_length)]; + // size_t output_length; + // RawCompress(input, input_length, output, &output_length); + // ... Process(output, output_length) ... + // delete [] output; + void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + + // Given data in "compressed[0..compressed_length-1]" generated by + // calling the Snappy::Compress routine, this routine + // stores the uncompressed data to + // uncompressed[0..GetUncompressedLength(compressed)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(const char* compressed, size_t compressed_length, + char* uncompressed); + + // Given data from the byte source 'compressed' generated by calling + // the Snappy::Compress routine, this routine stores the uncompressed + // data to + // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(Source* compressed, char* uncompressed); + + // Given data in "compressed[0..compressed_length-1]" generated by + // calling the Snappy::Compress routine, this routine + // stores the uncompressed data to the iovec "iov". The number of physical + // buffers in "iov" is given by iov_cnt and their cumulative size + // must be at least GetUncompressedLength(compressed). The individual buffers + // in "iov" must not overlap with each other. + // + // returns false if the message is corrupted and could not be decrypted + bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, + const struct iovec* iov, size_t iov_cnt); + + // Given data from the byte source 'compressed' generated by calling + // the Snappy::Compress routine, this routine stores the uncompressed + // data to the iovec "iov". The number of physical + // buffers in "iov" is given by iov_cnt and their cumulative size + // must be at least GetUncompressedLength(compressed). The individual buffers + // in "iov" must not overlap with each other. + // + // returns false if the message is corrupted and could not be decrypted + bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, + size_t iov_cnt); + + // Returns the maximal size of the compressed representation of + // input data that is "source_bytes" bytes in length; + size_t MaxCompressedLength(size_t source_bytes); + + // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() + // Returns true and stores the length of the uncompressed data in + // *result normally. Returns false on parsing error. + // This operation takes O(1) time. + bool GetUncompressedLength(const char* compressed, size_t compressed_length, + size_t* result); + + // Returns true iff the contents of "compressed[]" can be uncompressed + // successfully. Does not return the uncompressed data. Takes + // time proportional to compressed_length, but is usually at least + // a factor of four faster than actual decompression. + bool IsValidCompressedBuffer(const char* compressed, + size_t compressed_length); + + // The size of a compression block. Note that many parts of the compression + // code assumes that kBlockSize <= 65536; in particular, the hash table + // can only store 16-bit offsets, and EmitCopy() also assumes the offset + // is 65535 bytes or less. Note also that if you change this, it will + // affect the framing format (see framing_format.txt). + // + // Note that there might be older data around that is compressed with larger + // block sizes, so the decompression code should not rely on the + // non-existence of long backreferences. + static const int kBlockLog = 16; + static const size_t kBlockSize = 1 << kBlockLog; + + static const int kMaxHashTableBits = 14; + static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; +} // end namespace snappy + + +#endif // UTIL_SNAPPY_SNAPPY_H__ diff --git a/c-blosc/internal-complibs/zlib-1.2.8/adler32.c b/c-blosc/internal-complibs/zlib-1.2.8/adler32.c new file mode 100644 index 0000000..a868f07 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/adler32.c @@ -0,0 +1,179 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +#define local static + +local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); + +#define BASE 65521 /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ +#ifdef NO_DIVIDE +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ + do { \ + CHOP(a); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD(a) \ + do { \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +local uLong adler32_combine_(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + + /* the derivation of this formula is left as an exercise for the reader */ + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/compress.c b/c-blosc/internal-complibs/zlib-1.2.8/compress.c new file mode 100644 index 0000000..6e97626 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/compress.c @@ -0,0 +1,80 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = (uInt)sourceLen; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; +#endif + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; +{ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/crc32.c b/c-blosc/internal-complibs/zlib-1.2.8/crc32.c new file mode 100644 index 0000000..979a719 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/crc32.c @@ -0,0 +1,425 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + + DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +#define local static + +/* Definitions for doing the crc four data bytes at a time. */ +#if !defined(NOBYFOUR) && defined(Z_U4) +# define BYFOUR +#endif +#ifdef BYFOUR + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, unsigned)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, unsigned)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); + + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local z_crc_t FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const z_crc_t FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + z_crc_t c; + int n, k; + z_crc_t poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0; + for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) + poly |= (z_crc_t)1 << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (z_crc_t)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = ZSWAP32(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = ZSWAP32(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const z_crc_t FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const z_crc_t FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const z_crc_t FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const z_crc_t FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + uInt len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + z_crc_t endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +#ifdef BYFOUR + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = (z_crc_t)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *++buf4; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register z_crc_t c; + register const z_crc_t FAR *buf4; + + c = ZSWAP32((z_crc_t)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; + buf4--; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf4++; + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(ZSWAP32(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +local uLong crc32_combine_(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case (also disallow negative lengths) */ + if (len2 <= 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} + +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/crc32.h b/c-blosc/internal-complibs/zlib-1.2.8/crc32.h new file mode 100644 index 0000000..9e0c778 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/crc32.h @@ -0,0 +1,441 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff --git a/c-blosc/internal-complibs/zlib-1.2.8/deflate.c b/c-blosc/internal-complibs/zlib-1.2.8/deflate.c new file mode 100644 index 0000000..6969577 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/deflate.c @@ -0,0 +1,1967 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://tools.ietf.org/html/rfc1951 + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local block_state deflate_rle OF((deflate_state *s, int flush)); +local block_state deflate_huff OF((deflate_state *s, int flush)); +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif + +#ifdef DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +#ifndef NO_DUMMY_DECL +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ +#endif + +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) << 1) - ((f) > 4 ? 9 : 0)) + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->high_water = 0; /* nothing written to s->window yet */ + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; + + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL) + return Z_STREAM_ERROR; + s = strm->state; + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; + + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; + } + + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); + } + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateResetKeep (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (strm->state->wrap != 2) return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePending (strm, pending, bits) + unsigned *pending; + int *bits; + z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + deflate_state *s; + int put; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && + strm->total_in != 0) { + /* Flush the last buffer: */ + err = deflate(strm, Z_BLOCK); + if (err == Z_BUF_ERROR && s->pending == 0) + err = Z_OK; + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = good_length; + s->max_lazy_match = max_lazy; + s->nice_match = nice_length; + s->max_chain_length = max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds for + * every combination of windowBits and memLevel. But even the conservative + * upper bound of about 14% expansion does not seem onerous for output buffer + * allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong complen, wraplen; + Bytef *str; + + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; + + /* if can't get parameters, return conservative bound plus zlib wrapper */ + if (strm == Z_NULL || strm->state == Z_NULL) + return complen + 6; + + /* compute wrapper length */ + s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return conservative bound */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return complen + wraplen; + + /* default settings: return tight bound for that case */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len; + deflate_state *s = strm->state; + + _tr_flush_bits(s); + len = s->pending; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_BLOCK || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the header */ + if (s->status == INIT_STATE) { +#ifdef GZIP + if (s->wrap == 2) { + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + else +#endif + { + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + } + } +#ifdef GZIP + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != Z_NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + + while (s->gzindex < (s->gzhead->extra_len & 0xffff)) { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) + break; + } + put_byte(s, s->gzhead->extra[s->gzindex]); + s->gzindex++; + } + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (s->gzindex == s->gzhead->extra_len) { + s->gzindex = 0; + s->status = NAME_STATE; + } + } + else + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != Z_NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) { + s->gzindex = 0; + s->status = COMMENT_STATE; + } + } + else + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != Z_NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) + s->status = HCRC_STATE; + } + else + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) + flush_pending(strm); + if (s->pending + 2 <= s->pending_buf_size) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + } + } + else + s->status = BUSY_STATE; + } +#endif + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + (s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush)); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + + status = strm->state->status; + if (status != INIT_STATE && + status != EXTRA_STATE && + status != NAME_STATE && + status != COMMENT_STATE && + status != HCRC_STATE && + status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local int read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ + +#else /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for FASTEST only + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#endif /* FASTEST */ + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, last) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (last)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if ((long)s->strstart > s->block_start) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} +#endif /* FASTEST */ + +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest run, plus one for the unrolled loop. + */ + if (s->lookahead <= MAX_MATCH) { + fill_window(s); + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { + scan = s->window + s->strstart - 1; + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (int)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/deflate.h b/c-blosc/internal-complibs/zlib-1.2.8/deflate.h new file mode 100644 index 0000000..ce0299e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/deflate.h @@ -0,0 +1,346 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2012 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 +#define EXTRA_STATE 69 +#define NAME_STATE 73 +#define COMMENT_STATE 91 +#define HCRC_STATE 103 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + uInt pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + uInt gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); +int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); +void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; +#else + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff --git a/c-blosc/internal-complibs/zlib-1.2.8/gzclose.c b/c-blosc/internal-complibs/zlib-1.2.8/gzclose.c new file mode 100644 index 0000000..caeb99a --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/gzclose.c @@ -0,0 +1,25 @@ +/* gzclose.c -- zlib gzclose() function + * Copyright (C) 2004, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* gzclose() is in a separate file so that it is linked in only if it is used. + That way the other gzclose functions can be used instead to avoid linking in + unneeded compression or decompression routines. */ +int ZEXPORT gzclose(file) + gzFile file; +{ +#ifndef NO_GZCOMPRESS + gz_statep state; + + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); +#else + return gzclose_r(file); +#endif +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/gzguts.h b/c-blosc/internal-complibs/zlib-1.2.8/gzguts.h new file mode 100644 index 0000000..d87659d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/gzguts.h @@ -0,0 +1,209 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# ifdef _FILE_OFFSET_BITS +# undef _FILE_OFFSET_BITS +# endif +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99, yet still not supported by + Microsoft more than a decade later!), _snprintf does not guarantee null + termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#ifdef _MSC_VER +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc OF((uInt size)); + extern void free OF((voidpf ptr)); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +#ifdef INT_MAX +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +#else +unsigned ZLIB_INTERNAL gz_intmax OF((void)); +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +#endif diff --git a/c-blosc/internal-complibs/zlib-1.2.8/gzlib.c b/c-blosc/internal-complibs/zlib-1.2.8/gzlib.c new file mode 100644 index 0000000..fae202e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/gzlib.c @@ -0,0 +1,634 @@ +/* gzlib.c -- zlib functions common to reading and writing gzip files + * Copyright (C) 2004, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +#if defined(_WIN32) && !defined(__BORLANDC__) +# define LSEEK _lseeki64 +#else +#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 +# define LSEEK lseek64 +#else +# define LSEEK lseek +#endif +#endif + +/* Local functions */ +local void gz_reset OF((gz_statep)); +local gzFile gz_open OF((const void *, int, const char *)); + +#if defined UNDER_CE + +/* Map the Windows error number in ERROR to a locale-dependent error message + string and return a pointer to it. Typically, the values for ERROR come + from GetLastError. + + The string pointed to shall not be modified by the application, but may be + overwritten by a subsequent call to gz_strwinerror + + The gz_strwinerror function does not change the current setting of + GetLastError. */ +char ZLIB_INTERNAL *gz_strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +#endif /* UNDER_CE */ + +/* Reset gzip file state */ +local void gz_reset(state) + gz_statep state; +{ + state->x.have = 0; /* no output data available */ + if (state->mode == GZ_READ) { /* for reading ... */ + state->eof = 0; /* not at end of file */ + state->past = 0; /* have not read past end yet */ + state->how = LOOK; /* look for gzip header */ + } + state->seek = 0; /* no seek request pending */ + gz_error(state, Z_OK, NULL); /* clear error */ + state->x.pos = 0; /* no uncompressed data yet */ + state->strm.avail_in = 0; /* no input data yet */ +} + +/* Open a gzip file either by name or file descriptor. */ +local gzFile gz_open(path, fd, mode) + const void *path; + int fd; + const char *mode; +{ + gz_statep state; + size_t len; + int oflag; +#ifdef O_CLOEXEC + int cloexec = 0; +#endif +#ifdef O_EXCL + int exclusive = 0; +#endif + + /* check input */ + if (path == NULL) + return NULL; + + /* allocate gzFile structure to return */ + state = (gz_statep)malloc(sizeof(gz_state)); + if (state == NULL) + return NULL; + state->size = 0; /* no buffers allocated yet */ + state->want = GZBUFSIZE; /* requested buffer size */ + state->msg = NULL; /* no error message yet */ + + /* interpret mode */ + state->mode = GZ_NONE; + state->level = Z_DEFAULT_COMPRESSION; + state->strategy = Z_DEFAULT_STRATEGY; + state->direct = 0; + while (*mode) { + if (*mode >= '0' && *mode <= '9') + state->level = *mode - '0'; + else + switch (*mode) { + case 'r': + state->mode = GZ_READ; + break; +#ifndef NO_GZCOMPRESS + case 'w': + state->mode = GZ_WRITE; + break; + case 'a': + state->mode = GZ_APPEND; + break; +#endif + case '+': /* can't read and write at the same time */ + free(state); + return NULL; + case 'b': /* ignore -- will request binary anyway */ + break; +#ifdef O_CLOEXEC + case 'e': + cloexec = 1; + break; +#endif +#ifdef O_EXCL + case 'x': + exclusive = 1; + break; +#endif + case 'f': + state->strategy = Z_FILTERED; + break; + case 'h': + state->strategy = Z_HUFFMAN_ONLY; + break; + case 'R': + state->strategy = Z_RLE; + break; + case 'F': + state->strategy = Z_FIXED; + break; + case 'T': + state->direct = 1; + break; + default: /* could consider as an error, but just ignore */ + ; + } + mode++; + } + + /* must provide an "r", "w", or "a" */ + if (state->mode == GZ_NONE) { + free(state); + return NULL; + } + + /* can't force transparent read */ + if (state->mode == GZ_READ) { + if (state->direct) { + free(state); + return NULL; + } + state->direct = 1; /* for empty file */ + } + + /* save the path name for error messages */ +#ifdef _WIN32 + if (fd == -2) { + len = wcstombs(NULL, path, 0); + if (len == (size_t)-1) + len = 0; + } + else +#endif + len = strlen((const char *)path); + state->path = (char *)malloc(len + 1); + if (state->path == NULL) { + free(state); + return NULL; + } +#ifdef _WIN32 + if (fd == -2) + if (len) + wcstombs(state->path, path, len + 1); + else + *(state->path) = 0; + else +#endif +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(state->path, len + 1, "%s", (const char *)path); +#else + strcpy(state->path, path); +#endif + + /* compute the flags for open() */ + oflag = +#ifdef O_LARGEFILE + O_LARGEFILE | +#endif +#ifdef O_BINARY + O_BINARY | +#endif +#ifdef O_CLOEXEC + (cloexec ? O_CLOEXEC : 0) | +#endif + (state->mode == GZ_READ ? + O_RDONLY : + (O_WRONLY | O_CREAT | +#ifdef O_EXCL + (exclusive ? O_EXCL : 0) | +#endif + (state->mode == GZ_WRITE ? + O_TRUNC : + O_APPEND))); + + /* open the file with the appropriate flags (or just use fd) */ + state->fd = fd > -1 ? fd : ( +#ifdef _WIN32 + fd == -2 ? _wopen(path, oflag, 0666) : +#endif + open((const char *)path, oflag, 0666)); + if (state->fd == -1) { + free(state->path); + free(state); + return NULL; + } + if (state->mode == GZ_APPEND) + state->mode = GZ_WRITE; /* simplify later checks */ + + /* save the current position for rewinding (only if reading) */ + if (state->mode == GZ_READ) { + state->start = LSEEK(state->fd, 0, SEEK_CUR); + if (state->start == -1) state->start = 0; + } + + /* initialize stream */ + gz_reset(state); + + /* return stream */ + return (gzFile)state; +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen64(path, mode) + const char *path; + const char *mode; +{ + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzdopen(fd, mode) + int fd; + const char *mode; +{ + char *path; /* identifier for error messages */ + gzFile gz; + + if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL) + return NULL; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(path, 7 + 3 * sizeof(int), "", fd); /* for debugging */ +#else + sprintf(path, "", fd); /* for debugging */ +#endif + gz = gz_open(path, fd, mode); + free(path); + return gz; +} + +/* -- see zlib.h -- */ +#ifdef _WIN32 +gzFile ZEXPORT gzopen_w(path, mode) + const wchar_t *path; + const char *mode; +{ + return gz_open(path, -2, mode); +} +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzbuffer(file, size) + gzFile file; + unsigned size; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* make sure we haven't already allocated memory */ + if (state->size != 0) + return -1; + + /* check and set requested size */ + if (size < 2) + size = 2; /* need two bytes to check magic header */ + state->want = size; + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzrewind(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* back up and start over */ + if (LSEEK(state->fd, state->start, SEEK_SET) == -1) + return -1; + gz_reset(state); + return 0; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzseek64(file, offset, whence) + gzFile file; + z_off64_t offset; + int whence; +{ + unsigned n; + z_off64_t ret; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* check that there's no error */ + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* can only seek from start or relative to current position */ + if (whence != SEEK_SET && whence != SEEK_CUR) + return -1; + + /* normalize offset to a SEEK_CUR specification */ + if (whence == SEEK_SET) + offset -= state->x.pos; + else if (state->seek) + offset += state->skip; + state->seek = 0; + + /* if within raw area while reading, just go there */ + if (state->mode == GZ_READ && state->how == COPY && + state->x.pos + offset >= 0) { + ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR); + if (ret == -1) + return -1; + state->x.have = 0; + state->eof = 0; + state->past = 0; + state->seek = 0; + gz_error(state, Z_OK, NULL); + state->strm.avail_in = 0; + state->x.pos += offset; + return state->x.pos; + } + + /* calculate skip amount, rewinding if needed for back seek when reading */ + if (offset < 0) { + if (state->mode != GZ_READ) /* writing -- can't go backwards */ + return -1; + offset += state->x.pos; + if (offset < 0) /* before start of file! */ + return -1; + if (gzrewind(file) == -1) /* rewind, then skip to offset */ + return -1; + } + + /* if reading, skip what's in output buffer (one less gzgetc() check) */ + if (state->mode == GZ_READ) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ? + (unsigned)offset : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + offset -= n; + } + + /* request skip (if not zero) */ + if (offset) { + state->seek = 1; + state->skip = offset; + } + return state->x.pos + offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzseek(file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + z_off64_t ret; + + ret = gzseek64(file, (z_off64_t)offset, whence); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gztell64(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* return position */ + return state->x.pos + (state->seek ? state->skip : 0); +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gztell(file) + gzFile file; +{ + z_off64_t ret; + + ret = gztell64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzoffset64(file) + gzFile file; +{ + z_off64_t offset; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* compute and return effective offset in file */ + offset = LSEEK(state->fd, 0, SEEK_CUR); + if (offset == -1) + return -1; + if (state->mode == GZ_READ) /* reading */ + offset -= state->strm.avail_in; /* don't count buffered input */ + return offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzoffset(file) + gzFile file; +{ + z_off64_t ret; + + ret = gzoffset64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzeof(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return 0; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return 0; + + /* return end-of-file state */ + return state->mode == GZ_READ ? state->past : 0; +} + +/* -- see zlib.h -- */ +const char * ZEXPORT gzerror(file, errnum) + gzFile file; + int *errnum; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return NULL; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return NULL; + + /* return error information */ + if (errnum != NULL) + *errnum = state->err; + return state->err == Z_MEM_ERROR ? "out of memory" : + (state->msg == NULL ? "" : state->msg); +} + +/* -- see zlib.h -- */ +void ZEXPORT gzclearerr(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return; + + /* clear error and end-of-file */ + if (state->mode == GZ_READ) { + state->eof = 0; + state->past = 0; + } + gz_error(state, Z_OK, NULL); +} + +/* Create an error message in allocated memory and set state->err and + state->msg accordingly. Free any previous error message already there. Do + not try to free or allocate space if the error is Z_MEM_ERROR (out of + memory). Simply save the error message as a static string. If there is an + allocation failure constructing the error message, then convert the error to + out of memory. */ +void ZLIB_INTERNAL gz_error(state, err, msg) + gz_statep state; + int err; + const char *msg; +{ + /* free previously allocated message and clear */ + if (state->msg != NULL) { + if (state->err != Z_MEM_ERROR) + free(state->msg); + state->msg = NULL; + } + + /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */ + if (err != Z_OK && err != Z_BUF_ERROR) + state->x.have = 0; + + /* set error code, and if no message, then done */ + state->err = err; + if (msg == NULL) + return; + + /* for an out of memory error, return literal string when requested */ + if (err == Z_MEM_ERROR) + return; + + /* construct error message with path */ + if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) == + NULL) { + state->err = Z_MEM_ERROR; + return; + } +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, + "%s%s%s", state->path, ": ", msg); +#else + strcpy(state->msg, state->path); + strcat(state->msg, ": "); + strcat(state->msg, msg); +#endif + return; +} + +#ifndef INT_MAX +/* portably return maximum value for an int (when limits.h presumed not + available) -- we need to do this to cover cases where 2's complement not + used, since C standard permits 1's complement and sign-bit representations, + otherwise we could just use ((unsigned)-1) >> 1 */ +unsigned ZLIB_INTERNAL gz_intmax() +{ + unsigned p, q; + + p = 1; + do { + q = p; + p <<= 1; + p++; + } while (p > q); + return q >> 1; +} +#endif diff --git a/c-blosc/internal-complibs/zlib-1.2.8/gzread.c b/c-blosc/internal-complibs/zlib-1.2.8/gzread.c new file mode 100644 index 0000000..bf4538e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/gzread.c @@ -0,0 +1,594 @@ +/* gzread.c -- zlib functions for reading gzip files + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); +local int gz_avail OF((gz_statep)); +local int gz_look OF((gz_statep)); +local int gz_decomp OF((gz_statep)); +local int gz_fetch OF((gz_statep)); +local int gz_skip OF((gz_statep, z_off64_t)); + +/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from + state->fd, and update state->eof, state->err, and state->msg as appropriate. + This function needs to loop on read(), since read() is not guaranteed to + read the number of bytes requested, depending on the type of descriptor. */ +local int gz_load(state, buf, len, have) + gz_statep state; + unsigned char *buf; + unsigned len; + unsigned *have; +{ + int ret; + + *have = 0; + do { + ret = read(state->fd, buf + *have, len - *have); + if (ret <= 0) + break; + *have += ret; + } while (*have < len); + if (ret < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (ret == 0) + state->eof = 1; + return 0; +} + +/* Load up input buffer and set eof flag if last data loaded -- return -1 on + error, 0 otherwise. Note that the eof flag is set when the end of the input + file is reached, even though there may be unused data in the buffer. Once + that data has been used, no more attempts will be made to read the file. + If strm->avail_in != 0, then the current data is moved to the beginning of + the input buffer, and then the remainder of the buffer is loaded with the + available data from the input file. */ +local int gz_avail(state) + gz_statep state; +{ + unsigned got; + z_streamp strm = &(state->strm); + + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + if (state->eof == 0) { + if (strm->avail_in) { /* copy what's there to the start */ + unsigned char *p = state->in; + unsigned const char *q = strm->next_in; + unsigned n = strm->avail_in; + do { + *p++ = *q++; + } while (--n); + } + if (gz_load(state, state->in + strm->avail_in, + state->size - strm->avail_in, &got) == -1) + return -1; + strm->avail_in += got; + strm->next_in = state->in; + } + return 0; +} + +/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. + If this is the first time in, allocate required memory. state->how will be + left unchanged if there is no more input data available, will be set to COPY + if there is no gzip header and direct copying will be performed, or it will + be set to GZIP for decompression. If direct copying, then leftover input + data from the input buffer will be copied to the output buffer. In that + case, all further file reads will be directly to either the output buffer or + a user buffer. If decompressing, the inflate state will be initialized. + gz_look() will return 0 on success or -1 on failure. */ +local int gz_look(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + /* allocate read buffers and inflate memory */ + if (state->size == 0) { + /* allocate buffers */ + state->in = (unsigned char *)malloc(state->want); + state->out = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL || state->out == NULL) { + if (state->out != NULL) + free(state->out); + if (state->in != NULL) + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + state->size = state->want; + + /* allocate inflate memory */ + state->strm.zalloc = Z_NULL; + state->strm.zfree = Z_NULL; + state->strm.opaque = Z_NULL; + state->strm.avail_in = 0; + state->strm.next_in = Z_NULL; + if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ + free(state->out); + free(state->in); + state->size = 0; + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* get at least the magic bytes in the input buffer */ + if (strm->avail_in < 2) { + if (gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) + return 0; + } + + /* look for gzip magic bytes -- if there, do gzip decoding (note: there is + a logical dilemma here when considering the case of a partially written + gzip file, to wit, if a single 31 byte is written, then we cannot tell + whether this is a single-byte file, or just a partially written gzip + file -- for here we assume that if a gzip file is being written, then + the header will be written in a single operation, so that reading a + single byte is sufficient indication that it is not a gzip file) */ + if (strm->avail_in > 1 && + strm->next_in[0] == 31 && strm->next_in[1] == 139) { + inflateReset(strm); + state->how = GZIP; + state->direct = 0; + return 0; + } + + /* no gzip header -- if we were decoding gzip before, then this is trailing + garbage. Ignore the trailing garbage and finish. */ + if (state->direct == 0) { + strm->avail_in = 0; + state->eof = 1; + state->x.have = 0; + return 0; + } + + /* doing raw i/o, copy any leftover input to output -- this assumes that + the output buffer is larger than the input buffer, which also assures + space for gzungetc() */ + state->x.next = state->out; + if (strm->avail_in) { + memcpy(state->x.next, strm->next_in, strm->avail_in); + state->x.have = strm->avail_in; + strm->avail_in = 0; + } + state->how = COPY; + state->direct = 1; + return 0; +} + +/* Decompress from input to the provided next_out and avail_out in the state. + On return, state->x.have and state->x.next point to the just decompressed + data. If the gzip stream completes, state->how is reset to LOOK to look for + the next gzip stream or raw data, once state->x.have is depleted. Returns 0 + on success, -1 on failure. */ +local int gz_decomp(state) + gz_statep state; +{ + int ret = Z_OK; + unsigned had; + z_streamp strm = &(state->strm); + + /* fill output buffer up to end of deflate stream */ + had = strm->avail_out; + do { + /* get more input for inflate() */ + if (strm->avail_in == 0 && gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) { + gz_error(state, Z_BUF_ERROR, "unexpected end of file"); + break; + } + + /* decompress and handle errors */ + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { + gz_error(state, Z_STREAM_ERROR, + "internal error: inflate stream corrupt"); + return -1; + } + if (ret == Z_MEM_ERROR) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ + gz_error(state, Z_DATA_ERROR, + strm->msg == NULL ? "compressed data error" : strm->msg); + return -1; + } + } while (strm->avail_out && ret != Z_STREAM_END); + + /* update available output */ + state->x.have = had - strm->avail_out; + state->x.next = strm->next_out - state->x.have; + + /* if the gzip stream completed successfully, look for another */ + if (ret == Z_STREAM_END) + state->how = LOOK; + + /* good decompression */ + return 0; +} + +/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. + Data is either copied from the input file or decompressed from the input + file depending on state->how. If state->how is LOOK, then a gzip header is + looked for to determine whether to copy or decompress. Returns -1 on error, + otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the + end of the input file has been reached and all data has been processed. */ +local int gz_fetch(state) + gz_statep state; +{ + z_streamp strm = &(state->strm); + + do { + switch(state->how) { + case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ + if (gz_look(state) == -1) + return -1; + if (state->how == LOOK) + return 0; + break; + case COPY: /* -> COPY */ + if (gz_load(state, state->out, state->size << 1, &(state->x.have)) + == -1) + return -1; + state->x.next = state->out; + return 0; + case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ + strm->avail_out = state->size << 1; + strm->next_out = state->out; + if (gz_decomp(state) == -1) + return -1; + } + } while (state->x.have == 0 && (!state->eof || strm->avail_in)); + return 0; +} + +/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ +local int gz_skip(state, len) + gz_statep state; + z_off64_t len; +{ + unsigned n; + + /* skip over len bytes or reach end-of-file, whichever comes first */ + while (len) + /* skip over whatever is in output buffer */ + if (state->x.have) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? + (unsigned)len : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + len -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) + break; + + /* need more data to skip -- load up output buffer */ + else { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + } + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzread(file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + unsigned got, n; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids the flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return -1; + } + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* get len bytes to buf, or less than len if at the end */ + got = 0; + do { + /* first just try copying data from the output buffer */ + if (state->x.have) { + n = state->x.have > len ? len : state->x.have; + memcpy(buf, state->x.next, n); + state->x.next += n; + state->x.have -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && strm->avail_in == 0) { + state->past = 1; /* tried to read past end */ + break; + } + + /* need output data -- for small len or new stream load up our output + buffer */ + else if (state->how == LOOK || len < (state->size << 1)) { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + continue; /* no progress yet -- go back to copy above */ + /* the copy above assures that we will leave with space in the + output buffer, allowing at least one gzungetc() to succeed */ + } + + /* large len -- read directly into user buffer */ + else if (state->how == COPY) { /* read directly */ + if (gz_load(state, (unsigned char *)buf, len, &n) == -1) + return -1; + } + + /* large len -- decompress directly into user buffer */ + else { /* state->how == GZIP */ + strm->avail_out = len; + strm->next_out = (unsigned char *)buf; + if (gz_decomp(state) == -1) + return -1; + n = state->x.have; + state->x.have = 0; + } + + /* update progress */ + len -= n; + buf = (char *)buf + n; + got += n; + state->x.pos += n; + } while (len); + + /* return number of bytes read into user buffer (will fit in int) */ + return (int)got; +} + +/* -- see zlib.h -- */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +#else +# undef gzgetc +#endif +int ZEXPORT gzgetc(file) + gzFile file; +{ + int ret; + unsigned char buf[1]; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* try output buffer (no need to check for skip request) */ + if (state->x.have) { + state->x.have--; + state->x.pos++; + return *(state->x.next)++; + } + + /* nothing there -- try gzread() */ + ret = gzread(file, buf, 1); + return ret < 1 ? -1 : buf[0]; +} + +int ZEXPORT gzgetc_(file) +gzFile file; +{ + return gzgetc(file); +} + +/* -- see zlib.h -- */ +int ZEXPORT gzungetc(c, file) + int c; + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* can't push EOF */ + if (c < 0) + return -1; + + /* if output buffer empty, put byte at end (allows more pushing) */ + if (state->x.have == 0) { + state->x.have = 1; + state->x.next = state->out + (state->size << 1) - 1; + state->x.next[0] = c; + state->x.pos--; + state->past = 0; + return c; + } + + /* if no room, give up (must have already done a gzungetc()) */ + if (state->x.have == (state->size << 1)) { + gz_error(state, Z_DATA_ERROR, "out of room to push characters"); + return -1; + } + + /* slide output data if needed and insert byte before existing data */ + if (state->x.next == state->out) { + unsigned char *src = state->out + state->x.have; + unsigned char *dest = state->out + (state->size << 1); + while (src > state->out) + *--dest = *--src; + state->x.next = dest; + } + state->x.have++; + state->x.next--; + state->x.next[0] = c; + state->x.pos--; + state->past = 0; + return c; +} + +/* -- see zlib.h -- */ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + unsigned left, n; + char *str; + unsigned char *eol; + gz_statep state; + + /* check parameters and get internal structure */ + if (file == NULL || buf == NULL || len < 1) + return NULL; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return NULL; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return NULL; + } + + /* copy output bytes up to new line or len - 1, whichever comes first -- + append a terminating zero to the string (we don't check for a zero in + the contents, let the user worry about that) */ + str = buf; + left = (unsigned)len - 1; + if (left) do { + /* assure that something is in the output buffer */ + if (state->x.have == 0 && gz_fetch(state) == -1) + return NULL; /* error */ + if (state->x.have == 0) { /* end of file */ + state->past = 1; /* read past end */ + break; /* return what we have */ + } + + /* look for end-of-line in current output buffer */ + n = state->x.have > left ? left : state->x.have; + eol = (unsigned char *)memchr(state->x.next, '\n', n); + if (eol != NULL) + n = (unsigned)(eol - state->x.next) + 1; + + /* copy through end-of-line, or remainder if not found */ + memcpy(buf, state->x.next, n); + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + left -= n; + buf += n; + } while (left && eol == NULL); + + /* return terminated string, or if nothing, end of file */ + if (buf == str) + return NULL; + buf[0] = 0; + return str; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzdirect(file) + gzFile file; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* if the state is not known, but we can find out, then do so (this is + mainly for right after a gzopen() or gzdopen()) */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* return 1 if transparent, 0 if processing a gzip stream */ + return state->direct; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_r(file) + gzFile file; +{ + int ret, err; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're reading */ + if (state->mode != GZ_READ) + return Z_STREAM_ERROR; + + /* free memory and close file */ + if (state->size) { + inflateEnd(&(state->strm)); + free(state->out); + free(state->in); + } + err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; + gz_error(state, Z_OK, NULL); + free(state->path); + ret = close(state->fd); + free(state); + return ret ? Z_ERRNO : err; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/gzwrite.c b/c-blosc/internal-complibs/zlib-1.2.8/gzwrite.c new file mode 100644 index 0000000..aa767fb --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/gzwrite.c @@ -0,0 +1,577 @@ +/* gzwrite.c -- zlib functions for writing gzip files + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Local functions */ +local int gz_init OF((gz_statep)); +local int gz_comp OF((gz_statep, int)); +local int gz_zero OF((gz_statep, z_off64_t)); + +/* Initialize state for writing a gzip file. Mark initialization by setting + state->size to non-zero. Return -1 on failure or 0 on success. */ +local int gz_init(state) + gz_statep state; +{ + int ret; + z_streamp strm = &(state->strm); + + /* allocate input buffer */ + state->in = (unsigned char *)malloc(state->want); + if (state->in == NULL) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* only need output buffer and deflate state if compressing */ + if (!state->direct) { + /* allocate output buffer */ + state->out = (unsigned char *)malloc(state->want); + if (state->out == NULL) { + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* allocate deflate memory, set up for gzip compression */ + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = deflateInit2(strm, state->level, Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy); + if (ret != Z_OK) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* mark state as initialized */ + state->size = state->want; + + /* initialize write buffer if compressing */ + if (!state->direct) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = strm->next_out; + } + return 0; +} + +/* Compress whatever is at avail_in and next_in and write to the output file. + Return -1 if there is an error writing to the output file, otherwise 0. + flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH, + then the deflate() state is reset to start a new gzip stream. If gz->direct + is true, then simply write to the output file without compressing, and + ignore flush. */ +local int gz_comp(state, flush) + gz_statep state; + int flush; +{ + int ret, got; + unsigned have; + z_streamp strm = &(state->strm); + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return -1; + + /* write directly if requested */ + if (state->direct) { + got = write(state->fd, strm->next_in, strm->avail_in); + if (got < 0 || (unsigned)got != strm->avail_in) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + strm->avail_in = 0; + return 0; + } + + /* run deflate() on provided input until it produces no more output */ + ret = Z_OK; + do { + /* write out current buffer contents if full, or if flushing, but if + doing Z_FINISH then don't write until we get to Z_STREAM_END */ + if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && + (flush != Z_FINISH || ret == Z_STREAM_END))) { + have = (unsigned)(strm->next_out - state->x.next); + if (have && ((got = write(state->fd, state->x.next, have)) < 0 || + (unsigned)got != have)) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (strm->avail_out == 0) { + strm->avail_out = state->size; + strm->next_out = state->out; + } + state->x.next = strm->next_out; + } + + /* compress */ + have = strm->avail_out; + ret = deflate(strm, flush); + if (ret == Z_STREAM_ERROR) { + gz_error(state, Z_STREAM_ERROR, + "internal error: deflate stream corrupt"); + return -1; + } + have -= strm->avail_out; + } while (have); + + /* if that completed a deflate stream, allow another to start */ + if (flush == Z_FINISH) + deflateReset(strm); + + /* all done, no errors */ + return 0; +} + +/* Compress len zeros to output. Return -1 on error, 0 on success. */ +local int gz_zero(state, len) + gz_statep state; + z_off64_t len; +{ + int first; + unsigned n; + z_streamp strm = &(state->strm); + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + + /* compress len zeros (len guaranteed > 0) */ + first = 1; + while (len) { + n = GT_OFF(state->size) || (z_off64_t)state->size > len ? + (unsigned)len : state->size; + if (first) { + memset(state->in, 0, n); + first = 0; + } + strm->avail_in = n; + strm->next_in = state->in; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + len -= n; + } + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzwrite(file, buf, len) + gzFile file; + voidpc buf; + unsigned len; +{ + unsigned put = len; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids the flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return 0; + } + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* for small len, copy to input buffer, otherwise compress directly */ + if (len < state->size) { + /* copy to input buffer, compress when full */ + do { + unsigned have, copy; + + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + copy = state->size - have; + if (copy > len) + copy = len; + memcpy(state->in + have, buf, copy); + strm->avail_in += copy; + state->x.pos += copy; + buf = (const char *)buf + copy; + len -= copy; + if (len && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } while (len); + } + else { + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* directly compress user buffer to file */ + strm->avail_in = len; + strm->next_in = (z_const Bytef *)buf; + state->x.pos += len; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } + + /* input was all buffered or compressed (put will fit in int) */ + return (int)put; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned have; + unsigned char buf[1]; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* try writing to input buffer for speed (state->size == 0 if buffer not + initialized) */ + if (state->size) { + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + if (have < state->size) { + state->in[have] = c; + strm->avail_in++; + state->x.pos++; + return c & 0xff; + } + } + + /* no room in buffer or not initialized, use gz_write() */ + buf[0] = c; + if (gzwrite(file, buf, 1) != 1) + return -1; + return c & 0xff; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputs(file, str) + gzFile file; + const char *str; +{ + int ret; + unsigned len; + + /* write string */ + len = (unsigned)strlen(str); + ret = gzwrite(file, str, len); + return ret == 0 && len != 0 ? -1 : ret; +} + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +#include + +/* -- see zlib.h -- */ +int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) +{ + int size, len; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* do the printf() into the input buffer, put length in len */ + size = (int)(state->size); + state->in[size - 1] = 0; +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf((char *)(state->in), format, va); + for (len = 0; len < size; len++) + if (state->in[len] == 0) break; +# else + len = vsprintf((char *)(state->in), format, va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf((char *)(state->in), size, format, va); + len = strlen((char *)(state->in)); +# else + len = vsnprintf((char *)(state->in), size, format, va); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len <= 0 || len >= (int)size || state->in[size - 1] != 0) + return 0; + + /* update buffer and position, defer compression until needed */ + strm->avail_in = (unsigned)len; + strm->next_in = state->in; + state->x.pos += len; + return len; +} + +int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) +{ + va_list va; + int ret; + + va_start(va, format); + ret = gzvprintf(file, format, va); + va_end(va); + return ret; +} + +#else /* !STDC && !Z_HAVE_STDARG_H */ + +/* -- see zlib.h -- */ +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + int size, len; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that can really pass pointer in ints */ + if (sizeof(int) != sizeof(void *)) + return 0; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* do the printf() into the input buffer, put length in len */ + size = (int)(state->size); + state->in[size - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf((char *)(state->in), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < size; len++) + if (state->in[len] == 0) break; +# else + len = sprintf((char *)(state->in), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf((char *)(state->in), size, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen((char *)(state->in)); +# else + len = snprintf((char *)(state->in), size, format, a1, a2, a3, a4, a5, a6, + a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, + a19, a20); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len <= 0 || len >= (int)size || state->in[size - 1] != 0) + return 0; + + /* update buffer and position, defer compression until needed */ + strm->avail_in = (unsigned)len; + strm->next_in = state->in; + state->x.pos += len; + return len; +} + +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzflush(file, flush) + gzFile file; + int flush; +{ + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* check flush parameter */ + if (flush < 0 || flush > Z_FINISH) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* compress remaining data with requested flush */ + gz_comp(state, flush); + return state->err; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzsetparams(file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ + if (level == state->level && strategy == state->strategy) + return Z_OK; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* change compression parameters for subsequent input */ + if (state->size) { + /* flush previous input with previous parameters before changing */ + if (strm->avail_in && gz_comp(state, Z_PARTIAL_FLUSH) == -1) + return state->err; + deflateParams(strm, level, strategy); + } + state->level = level; + state->strategy = strategy; + return Z_OK; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_w(file) + gzFile file; +{ + int ret = Z_OK; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing */ + if (state->mode != GZ_WRITE) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + ret = state->err; + } + + /* flush, free memory, and close file */ + if (gz_comp(state, Z_FINISH) == -1) + ret = state->err; + if (state->size) { + if (!state->direct) { + (void)deflateEnd(&(state->strm)); + free(state->out); + } + free(state->in); + } + gz_error(state, Z_OK, NULL); + free(state->path); + if (close(state->fd) == -1) + ret = Z_ERRNO; + free(state); + return ret; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/infback.c b/c-blosc/internal-complibs/zlib-1.2.8/infback.c new file mode 100644 index 0000000..f3833c2 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/infback.c @@ -0,0 +1,640 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2011 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->wnext = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + state->length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inffast.c b/c-blosc/internal-complibs/zlib-1.2.8/inffast.c new file mode 100644 index 0000000..bda59ce --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inffast.c @@ -0,0 +1,340 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2008, 2010, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifndef ASMINF + +/* Allow machine dependent optimization for post-increment or pre-increment. + Based on testing to date, + Pre-increment preferred for: + - PowerPC G3 (Adler) + - MIPS R5000 (Randers-Pehrson) + Post-increment preferred for: + - none + No measurable difference: + - Pentium III (Anderson) + - M68060 (Nikl) + */ +#ifdef POSTINC +# define OFF 0 +# define PUP(a) *(a)++ +#else +# define OFF 1 +# define PUP(a) *++(a) +#endif + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in - OFF; + last = in + (strm->avail_in - 5); + out = strm->next_out - OFF; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + PUP(out) = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + PUP(out) = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + PUP(out) = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + PUP(out) = PUP(from); + } while (--len); + continue; + } +#endif + } + from = window - OFF; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = window - OFF; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } while (len > 2); + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in + OFF; + strm->next_out = out + OFF; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inffast.h b/c-blosc/internal-complibs/zlib-1.2.8/inffast.h new file mode 100644 index 0000000..e5c1aa4 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inffast.h @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inffixed.h b/c-blosc/internal-complibs/zlib-1.2.8/inffixed.h new file mode 100644 index 0000000..d628327 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inffixed.h @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inflate.c b/c-blosc/internal-complibs/zlib-1.2.8/inflate.c new file mode 100644 index 0000000..870f89b --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inflate.c @@ -0,0 +1,1512 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common wnext == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, + unsigned len)); + +int ZEXPORT inflateResetKeep(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 1; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->window = Z_NULL; + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += value << state->bits; + state->bits += bits; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, end, copy) +z_streamp strm; +const Bytef *end; +unsigned copy; +{ + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + else if (len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if (state->flags & 0x0200) CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if (hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + state->mode = LIT; + break; + } + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if (out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if (( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if (state->wrap && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) +z_streamp strm; +Bytef *dictionary; +uInt *dictLength; +{ + struct inflate_state FAR *state; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long dictid; + int ret; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary identifier */ + if (state->mode == DICT) { + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +const unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL || + source->zalloc == (alloc_func)0 || source->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} + +int ZEXPORT inflateUndermine(strm, subvert) +z_streamp strm; +int subvert; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->sane = !subvert; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + return Z_OK; +#else + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +long ZEXPORT inflateMark(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return -1L << 16; + state = (struct inflate_state FAR *)strm->state; + return ((long)(state->back) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inflate.h b/c-blosc/internal-complibs/zlib-1.2.8/inflate.h new file mode 100644 index 0000000..95f4986 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inflate.h @@ -0,0 +1,122 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2009 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* state maintained between inflate() calls. Approximately 10K bytes. */ +struct inflate_state { + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inftrees.c b/c-blosc/internal-complibs/zlib-1.2.8/inftrees.c new file mode 100644 index 0000000..44d89cf --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inftrees.c @@ -0,0 +1,306 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.8 Copyright 1995-2013 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + here.op = (unsigned char)(extra[work[sym]]); + here.val = base[work[sym]]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/inftrees.h b/c-blosc/internal-complibs/zlib-1.2.8/inftrees.h new file mode 100644 index 0000000..baa53a0 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/inftrees.h @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff --git a/c-blosc/internal-complibs/zlib-1.2.8/trees.c b/c-blosc/internal-complibs/zlib-1.2.8/trees.c new file mode 100644 index 0000000..1fd7759 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/trees.c @@ -0,0 +1,1226 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2012 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); +local void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ +#ifdef DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; +#endif + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. + */ +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int last; /* one if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, last); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+last, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (last) { + bi_windup(s); +#ifdef DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*last)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ZLIB_INTERNAL _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "black list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(s) + deflate_state *s; +{ + /* black_mask is the bit mask of black-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long black_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("black-listed") bytes. */ + for (n = 0; n <= 31; n++, black_mask >>= 1) + if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("white-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "black-listed" or "white-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +local void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG + s->bits_sent += (ulg)len<<3; +#endif + while (len--) { + put_byte(s, *buf++); + } +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/trees.h b/c-blosc/internal-complibs/zlib-1.2.8/trees.h new file mode 100644 index 0000000..d35639d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/trees.h @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff --git a/c-blosc/internal-complibs/zlib-1.2.8/uncompr.c b/c-blosc/internal-complibs/zlib-1.2.8/uncompr.c new file mode 100644 index 0000000..242e949 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/uncompr.c @@ -0,0 +1,59 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003, 2010 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + z_stream stream; + int err; + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = (uInt)sourceLen; + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; + + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + err = inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + inflateEnd(&stream); + if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0)) + return Z_DATA_ERROR; + return err; + } + *destLen = stream.total_out; + + err = inflateEnd(&stream); + return err; +} diff --git a/c-blosc/internal-complibs/zlib-1.2.8/zconf.h b/c-blosc/internal-complibs/zlib-1.2.8/zconf.h new file mode 100644 index 0000000..9987a77 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/zconf.h @@ -0,0 +1,511 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2013 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzvprintf z_gzvprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetHeader z_inflateGetHeader +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateSetDictionary z_inflateSetDictionary +# define inflateGetDictionary z_inflateGetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateResetKeep z_inflateResetKeep +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/c-blosc/internal-complibs/zlib-1.2.8/zlib.h b/c-blosc/internal-complibs/zlib-1.2.8/zlib.h new file mode 100644 index 0000000..3e0c767 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/zlib.h @@ -0,0 +1,1768 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.8, April 28th, 2013 + + Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.8" +#define ZLIB_VERNUM 0x1280 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 8 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use in the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field (though see inflate()) */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). Some + output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed code + block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the stream + are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least the + value returned by deflateBound (see below). Then deflate is guaranteed to + return Z_STREAM_END. If not enough output space is provided, deflate will + not return Z_STREAM_END, and it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect the + compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the + exact value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit() does not process any header information -- that is deferred + until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing will + resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained, so applications that need that information should + instead use raw inflate, see inflateInit2() below, or inflateBack() and + perform their own processing of the gzip header and trailer. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + producted so far. The CRC-32 is checked against the gzip trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is desired. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the adler32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. The + stream will keep the same compression level and any other attributes that + may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression level is changed, the input available so far is + compressed with the old level (and may be flushed); the new level will take + effect only at the next call of deflate(). + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to be + compressed and flushed. In particular, strm->avail_out must be non-zero. + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if + strm->avail_out was zero. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above or -1 << 16 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the normal + behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed buffer. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Two buffers are allocated, either both of the specified size when + writing, or one of the specified size and the other twice that size when + reading. A larger buffer size of, for example, 64K or 128K bytes will + noticeably increase the speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or 0 in case of error. The number of + uncompressed bytes written is limited to 8191, or one less than the buffer + size given to gzbuffer(). The caller should assure that this limit is not + exceeded. If it is exceeded, then gzprintf() will return an error (0) with + nothing written. In this case, there may also be a buffer overflow with + unpredictable consequences, which is possible only if zlib was compiled with + the insecure functions sprintf() or vsprintf() because the secure snprintf() + or vsnprintf() functions were not available. This can be determined using + zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatented gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +#define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +#define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* hack for buggy compilers */ +#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; +#endif + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/c-blosc/internal-complibs/zlib-1.2.8/zutil.c b/c-blosc/internal-complibs/zlib-1.2.8/zutil.c new file mode 100644 index 0000000..23d2ebe --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/zutil.c @@ -0,0 +1,324 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2005, 2010, 2011, 2012 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif + +#ifndef NO_DUMMY_DECL +struct internal_state {int dummy;}; /* for buggy compilers */ +#endif + +z_const char * const z_errmsg[10] = { +"need dictionary", /* Z_NEED_DICT 2 */ +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +"incompatible version",/* Z_VERSION_ERROR (-6) */ +""}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch ((int)(sizeof(uInt))) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch ((int)(sizeof(uLong))) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch ((int)(sizeof(voidpf))) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch ((int)(sizeof(z_off_t))) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef DEBUG + +# ifndef verbose +# define verbose 0 +# endif +int ZLIB_INTERNAL z_verbose = verbose; + +void ZLIB_INTERNAL z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void ZLIB_INTERNAL zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int ZLIB_INTERNAL zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void ZLIB_INTERNAL zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef Z_SOLO + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf = opaque; /* just to make some compilers happy */ + ulg bsize = (ulg)items*size; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + int n; + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + ptr = opaque; /* just to make some compilers happy */ + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + return _halloc((long)items, size); +} + +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + if (opaque) items += size - size; /* make compiler happy */ + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void ZLIB_INTERNAL zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + free(ptr); + if (opaque) return; /* make compiler happy */ +} + +#endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff --git a/c-blosc/internal-complibs/zlib-1.2.8/zutil.h b/c-blosc/internal-complibs/zlib-1.2.8/zutil.h new file mode 100644 index 0000000..24ab06b --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.2.8/zutil.h @@ -0,0 +1,253 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2013 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) +# include +# endif +# include +# include +#endif + +#ifdef Z_SOLO + typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +# if defined(M_I86) && !defined(Z_SOLO) +# include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# ifndef Z_SOLO +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +# endif +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(pyr) || defined(Z_SOLO) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, + unsigned size)); + void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); +#endif + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/.gitignore b/c-blosc/internal-complibs/zstd-1.3.4/.gitignore new file mode 100644 index 0000000..4cd50ac --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/.gitignore @@ -0,0 +1,3 @@ +# make install artefact +libzstd.pc +libzstd-nomt diff --git a/c-blosc/internal-complibs/zstd-1.3.4/BUCK b/c-blosc/internal-complibs/zstd-1.3.4/BUCK new file mode 100644 index 0000000..dbe8885 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/BUCK @@ -0,0 +1,220 @@ +cxx_library( + name='zstd', + header_namespace='', + visibility=['PUBLIC'], + deps=[ + ':common', + ':compress', + ':decompress', + ':deprecated', + ], +) + +cxx_library( + name='compress', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('compress', 'zstd*.h'), + ]), + srcs=glob(['compress/zstd*.c']), + deps=[':common'], +) + +cxx_library( + name='decompress', + header_namespace='', + visibility=['PUBLIC'], + headers=subdir_glob([ + ('decompress', '*_impl.h'), + ]), + srcs=glob(['decompress/zstd*.c']), + deps=[ + ':common', + ':legacy', + ], +) + +cxx_library( + name='deprecated', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('decprecated', '*.h'), + ]), + srcs=glob(['deprecated/*.c']), + deps=[':common'], +) + +cxx_library( + name='legacy', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('legacy', '*.h'), + ]), + srcs=glob(['legacy/*.c']), + deps=[':common'], + exported_preprocessor_flags=[ + '-DZSTD_LEGACY_SUPPORT=4', + ], +) + +cxx_library( + name='zdict', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('dictBuilder', 'zdict.h'), + ]), + headers=subdir_glob([ + ('dictBuilder', 'divsufsort.h'), + ]), + srcs=glob(['dictBuilder/*.c']), + deps=[':common'], +) + +cxx_library( + name='compiler', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'compiler.h'), + ]), +) + +cxx_library( + name='cpu', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'cpu.h'), + ]), +) + +cxx_library( + name='bitstream', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'bitstream.h'), + ]), +) + +cxx_library( + name='entropy', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'fse.h'), + ('common', 'huf.h'), + ]), + srcs=[ + 'common/entropy_common.c', + 'common/fse_decompress.c', + 'compress/fse_compress.c', + 'compress/huf_compress.c', + 'decompress/huf_decompress.c', + ], + deps=[ + ':bitstream', + ':compiler', + ':errors', + ':mem', + ], +) + +cxx_library( + name='errors', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'error_private.h'), + ('common', 'zstd_errors.h'), + ]), + srcs=['common/error_private.c'], +) + +cxx_library( + name='mem', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'mem.h'), + ]), +) + +cxx_library( + name='pool', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'pool.h'), + ]), + srcs=['common/pool.c'], + deps=[ + ':threading', + ':zstd_common', + ], +) + +cxx_library( + name='threading', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'threading.h'), + ]), + srcs=['common/threading.c'], + exported_preprocessor_flags=[ + '-DZSTD_MULTITHREAD', + ], + exported_linker_flags=[ + '-pthread', + ], +) + +cxx_library( + name='xxhash', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'xxhash.h'), + ]), + srcs=['common/xxhash.c'], + exported_preprocessor_flags=[ + '-DXXH_NAMESPACE=ZSTD_', + ], +) + +cxx_library( + name='zstd_common', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('', 'zstd.h'), + ('common', 'zstd_internal.h'), + ]), + srcs=['common/zstd_common.c'], + deps=[ + ':compiler', + ':errors', + ':mem', + ], +) + +cxx_library( + name='common', + deps=[ + ':bitstream', + ':compiler', + ':cpu', + ':entropy', + ':errors', + ':mem', + ':pool', + ':threading', + ':xxhash', + ':zstd_common', + ] +) diff --git a/c-blosc/internal-complibs/zstd-1.3.4/Makefile b/c-blosc/internal-complibs/zstd-1.3.4/Makefile new file mode 100644 index 0000000..cdfdc5c --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/Makefile @@ -0,0 +1,189 @@ +# ################################################################ +# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +# Version numbers +LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h` +LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h` +LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h` +LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT) +LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) +LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) +LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) +LIBVER := $(shell echo $(LIBVER_SCRIPT)) +VERSION?= $(LIBVER) + +CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_ +CFLAGS ?= -O3 +DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) + + +ZSTD_FILES := $(sort $(wildcard common/*.c compress/*.c decompress/*.c dictBuilder/*.c deprecated/*.c)) + +ZSTD_LEGACY_SUPPORT ?= 4 + +ifneq ($(ZSTD_LEGACY_SUPPORT), 0) +ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) + ZSTD_FILES += $(shell ls legacy/*.c | grep 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') +endif + CPPFLAGS += -I./legacy +endif +CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) + +ZSTD_OBJ := $(patsubst %.c,%.o,$(ZSTD_FILES)) + +# OS X linker doesn't support -soname, and use different extension +# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html +ifeq ($(shell uname), Darwin) + SHARED_EXT = dylib + SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) + SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) + SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER) +else + SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR) + SHARED_EXT = so + SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR) + SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER) +endif + +LIBZSTD = libzstd.$(SHARED_EXT_VER) + + +.PHONY: default all clean install uninstall + +default: lib-release + +all: lib + +libzstd.a: ARFLAGS = rcs +libzstd.a: $(ZSTD_OBJ) + @echo compiling static library + @$(AR) $(ARFLAGS) $@ $^ + +libzstd.a-mt: CPPFLAGS += -DZSTD_MULTITHREAD +libzstd.a-mt: libzstd.a + +$(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden +$(LIBZSTD): $(ZSTD_FILES) + @echo compiling dynamic library $(LIBVER) +ifneq (,$(filter Windows%,$(OS))) + @$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -shared $^ -o dll\libzstd.dll + dlltool -D dll\libzstd.dll -d dll\libzstd.def -l dll\libzstd.lib +else + @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ + @echo creating versioned links + @ln -sf $@ libzstd.$(SHARED_EXT_MAJOR) + @ln -sf $@ libzstd.$(SHARED_EXT) +endif + +libzstd : $(LIBZSTD) + +libzstd-mt : CPPFLAGS += -DZSTD_MULTITHREAD +libzstd-mt : libzstd + +lib: libzstd.a libzstd + +lib-mt: CPPFLAGS += -DZSTD_MULTITHREAD +lib-mt: lib + +lib-release lib-release-mt: DEBUGFLAGS := +lib-release: lib +lib-release-mt: lib-mt + +# Special case : building library in single-thread mode _and_ without zstdmt_compress.c +ZSTDMT_FILES = compress/zstdmt_compress.c +ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES)) +libzstd-nomt: LDFLAGS += -shared -fPIC -fvisibility=hidden +libzstd-nomt: $(ZSTD_NOMT_FILES) + @echo compiling single-thread dynamic library $(LIBVER) + @echo files : $(ZSTD_NOMT_FILES) + @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ + +clean: + @$(RM) -r *.dSYM # Mac OS-X specific + @$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc + @$(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt* + @$(RM) common/*.o compress/*.o decompress/*.o dictBuilder/*.o legacy/*.o deprecated/*.o + @echo Cleaning library completed + +#----------------------------------------------------------------------------- +# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets +#----------------------------------------------------------------------------- +ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) + +DESTDIR ?= +# directory variables : GNU conventions prefer lowercase +# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html +# support both lower and uppercase (BSD), use uppercase in script +prefix ?= /usr/local +PREFIX ?= $(prefix) +exec_prefix ?= $(PREFIX) +libdir ?= $(exec_prefix)/lib +LIBDIR ?= $(libdir) +includedir ?= $(PREFIX)/include +INCLUDEDIR ?= $(includedir) + +ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly)) +PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig +else +PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig +endif + +ifneq (,$(filter $(shell uname),SunOS)) +INSTALL ?= ginstall +else +INSTALL ?= install +endif + +INSTALL_PROGRAM ?= $(INSTALL) +INSTALL_DATA ?= $(INSTALL) -m 644 + + +libzstd.pc: +libzstd.pc: libzstd.pc.in + @echo creating pkgconfig + @sed -e 's|@PREFIX@|$(PREFIX)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + $< >$@ + +install: libzstd.a libzstd libzstd.pc + @$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ $(DESTDIR)$(INCLUDEDIR)/ + @$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/ + @echo Installing libraries + @$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR) + @$(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR) + @ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) + @ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT) + @echo Installing includes + @$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR) + @$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR) + @$(INSTALL_DATA) deprecated/zbuff.h $(DESTDIR)$(INCLUDEDIR) # prototypes generate deprecation warnings + @$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR) + @echo zstd static and shared library installed + +uninstall: + @$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a + @$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT) + @$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) + @$(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD) + @$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc + @$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h + @$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h + @$(RM) $(DESTDIR)$(INCLUDEDIR)/zbuff.h # Deprecated streaming functions + @$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h + @echo zstd libraries successfully uninstalled + +endif diff --git a/c-blosc/internal-complibs/zstd-1.3.4/README.md b/c-blosc/internal-complibs/zstd-1.3.4/README.md new file mode 100644 index 0000000..95196e4 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/README.md @@ -0,0 +1,115 @@ +Zstandard library files +================================ + +The __lib__ directory is split into several sub-directories, +in order to make it easier to select or exclude features. + + +#### Building + +`Makefile` script is provided, supporting all standard [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions), +including commands variables, staged install, directory variables and standard targets. +- `make` : generates both static and dynamic libraries +- `make install` : install libraries in default system directories + +`libzstd` default scope includes compression, decompression, dictionary building, +and decoding support for legacy formats >= v0.4.0. + + +#### API + +Zstandard's stable API is exposed within [lib/zstd.h](zstd.h). + + +#### Advanced API + +Optional advanced features are exposed via : + +- `lib/common/zstd_errors.h` : translates `size_t` function results + into an `ZSTD_ErrorCode`, for accurate error handling. +- `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`, + it unlocks access to advanced experimental API, + exposed in second part of `zstd.h`. + These APIs are not "stable", their definition may change in the future. + As a consequence, it shall ___never be used with dynamic library___ ! + Only static linking is allowed. + + +#### Modular build + +It's possible to compile only a limited set of features. + +- Directory `lib/common` is always required, for all variants. +- Compression source code lies in `lib/compress` +- Decompression source code lies in `lib/decompress` +- It's possible to include only `compress` or only `decompress`, they don't depend on each other. +- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples. + The API is exposed in `lib/dictBuilder/zdict.h`. + This module depends on both `lib/common` and `lib/compress` . +- `lib/legacy` : source code to decompress legacy zstd formats, starting from `v0.1.0`. + This module depends on `lib/common` and `lib/decompress`. + To enable this feature, it's required to define `ZSTD_LEGACY_SUPPORT` during compilation. + Typically, with `gcc`, add argument `-DZSTD_LEGACY_SUPPORT=1`. + Using higher number limits versions supported. + For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0". + `ZSTD_LEGACY_SUPPORT=3` means : "support legacy formats >= v0.3.0", and so on. + Starting v0.8.0, all versions of `zstd` produce frames compliant with specification. + As a consequence, `ZSTD_LEGACY_SUPPORT=8` (or more) doesn't trigger legacy support. + Also, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats". + Once enabled, this capability is transparently triggered within decompression functions. + It's also possible to invoke directly legacy API, as exposed in `lib/legacy/zstd_legacy.h`. + Each version also provides an additional dedicated set of advanced API. + For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` . + Note : `lib/legacy` only supports _decoding_ legacy formats. + + +#### Multithreading support + +Multithreading is disabled by default when building with `make`. +Enabling multithreading requires 2 conditions : +- set macro `ZSTD_MULTITHREAD` +- on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`) + +Both conditions are automatically triggered by invoking `make lib-mt` target. +Note that, when linking a POSIX program with a multithreaded version of `libzstd`, +it's necessary to trigger `-pthread` flag during link stage. + +Multithreading capabilities are exposed +via [advanced API `ZSTD_compress_generic()` defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/dev/lib/zstd.h#L919). +This API is still considered experimental, +but is expected to become "stable" at some point in the future. + + +#### Windows : using MinGW+MSYS to create DLL + +DLL can be created using MinGW+MSYS with the `make libzstd` command. +This command creates `dll\libzstd.dll` and the import library `dll\libzstd.lib`. +The import library is only required with Visual C++. +The header file `zstd.h` and the dynamic library `dll\libzstd.dll` are required to +compile a project using gcc/MinGW. +The dynamic library has to be added to linking options. +It means that if a project that uses ZSTD consists of a single `test-dll.c` +file it should be linked with `dll\libzstd.dll`. For example: +``` + gcc $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\libzstd.dll +``` +The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`. + + +#### Deprecated API + +Obsolete API on their way out are stored in directory `lib/deprecated`. +At this stage, it contains older streaming prototypes, in `lib/deprecated/zbuff.h`. +These prototypes will be removed in some future version. +Consider migrating code towards supported streaming API exposed in `zstd.h`. + + +#### Miscellaneous + +The other files are not source code. There are : + + - `LICENSE` : contains the BSD license text + - `Makefile` : `make` script to build and install zstd library (static and dynamic) + - `BUCK` : support for `buck` build system (https://buckbuild.com/) + - `libzstd.pc.in` : for `pkg-config` (used in `make install`) + - `README.md` : this file diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/bitstream.h b/c-blosc/internal-complibs/zstd-1.3.4/common/bitstream.h new file mode 100644 index 0000000..f7f389f --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/bitstream.h @@ -0,0 +1,471 @@ +/* ****************************************************************** + bitstream + Part of FSE library + header file (to include) + Copyright (C) 2013-2017, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "error_private.h" /* error codes and messages */ + + +/*-************************************* +* Debug +***************************************/ +#if defined(BIT_DEBUG) && (BIT_DEBUG>=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + + +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct +{ + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + assert(bitC->ptr <= bitC->endPtr); + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ +# if defined(__x86_64__) + if (sizeof(bitContainer)==8) + return _bextr_u64(bitContainer, start, nbBits); + else +# endif + return _bextr_u32(bitContainer, start, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> start) & BIT_mask[nbBits]; +#endif +} + +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/compiler.h b/c-blosc/internal-complibs/zstd-1.3.4/common/compiler.h new file mode 100644 index 0000000..e90a3bc --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/compiler.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to elimininate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if (defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch */ +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +#elif defined(__GNUC__) +# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +#else +# define PREFETCH(ptr) /* disabled */ +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +#endif /* ZSTD_COMPILER_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/cpu.h b/c-blosc/internal-complibs/zstd-1.3.4/common/cpu.h new file mode 100644 index 0000000..4eb48e3 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/cpu.h @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +#include + +#include "mem.h" + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#ifdef _MSC_VER + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1) + :); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\r" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/entropy_common.c b/c-blosc/internal-complibs/zstd-1.3.4/common/entropy_common.c new file mode 100644 index 0000000..b37a082 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/entropy_common.c @@ -0,0 +1,221 @@ +/* + Common functions of New Generation Entropy library + Copyright (C) 2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*************************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +#include "huf.h" + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return ERROR(srcSize_wrong); + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) & (charnum<=*maxSVPtr)) { + if (previous0) { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) { + n0 += 24; + if (ip < iend-5) { + ip += 2; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 16; + bitCount += 16; + } } + while ((bitStream & 3) == 3) { + n0 += 3; + bitStream >>= 2; + bitCount += 2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 2; + } } + { int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= count < 0 ? -count : count; /* -1 means +1 */ + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + while (remaining < threshold) { + nbBits--; + threshold >>= 1; + } + + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + if (remaining != 1) return ERROR(corruption_detected); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/error_private.c b/c-blosc/internal-complibs/zstd-1.3.4/common/error_private.c new file mode 100644 index 0000000..d004ee6 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/error_private.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "error_private.h" + +const char* ERR_getErrorString(ERR_enum code) +{ + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(maxCode): + default: return notErrorCode; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/error_private.h b/c-blosc/internal-complibs/zstd-1.3.4/common/error_private.h new file mode 100644 index 0000000..0d2fa7e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/error_private.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include /* size_t */ +#include "zstd_errors.h" /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/fse.h b/c-blosc/internal-complibs/zstd-1.3.4/common/fse.h new file mode 100644 index 0000000..6a1d272 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/fse.h @@ -0,0 +1,704 @@ +/* ****************************************************************** + FSE : Finite State Entropy codec + Public Prototypes declaration + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_count(): + Provides the precise count of each byte within a table 'count'. + 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + *maxSymbolValuePtr will be updated if detected smaller than initial value. + @return : the count of the most frequent symbol (which is not identified). + if return == srcSize, there is only one symbol. + Can also return an error code, which can be tested with FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= `1024` unsigned + */ +size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace); + +/** FSE_countFast() : + * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr + */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` must be a table of minimum `1024` unsigned + */ +size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); + +/*! FSE_count_simple() : + * Same as FSE_countFast(), but does not use any additional memory (not even on stack). + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). +*/ +size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + + + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `(1<= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/fse_decompress.c b/c-blosc/internal-complibs/zstd-1.3.4/common/fse_decompress.c new file mode 100644 index 0000000..4c66c3b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/fse_decompress.c @@ -0,0 +1,309 @@ +/* ****************************************************************** + FSE : Finite State Entropy decoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + +/* check and forward error code */ +#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + free(dt); +} + +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + + /* normal FSE decoding mode */ + size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(NCountLength)) return NCountLength; + //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + ip += NCountLength; + cSrcSize -= NCountLength; + + CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); + + return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/huf.h b/c-blosc/internal-complibs/zstd-1.3.4/common/huf.h new file mode 100644 index 0000000..b4645b4 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/huf.h @@ -0,0 +1,327 @@ +/* ****************************************************************** + Huffman coder, part of New Generation Entropy library + header file + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +#include /* size_t */ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +#include "mem.h" /* U32 */ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ + void* name##hv = &(name##hb); \ + HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); + +#endif /* HUF_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/mem.h b/c-blosc/internal-complibs/zstd-1.3.4/common/mem.h new file mode 100644 index 0000000..47d2300 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/mem.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/* code only tested on 32 and 64 bits systems */ +#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } +MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/pool.c b/c-blosc/internal-complibs/zstd-1.3.4/common/pool.c new file mode 100644 index 0000000..773488b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/pool.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include /* size_t */ +#include "pool.h" +#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t *threads; + size_t numThreads; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + Work thread for the thread pool. + Waits for jobs and executes them. + @returns : NULL on failure else non-null. +*/ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while (ctx->queueEmpty && !ctx->shutdown) { + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* empty => shutting down: so stop */ + if (ctx->queueEmpty) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + if (ctx->queueSize == 1) { + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } } + } /* for (;;) */ + /* Unreachable */ +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + POOL_ctx* ctx; + /* Check the parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate empty + * and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->numThreads = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->numThreads = i; + POOL_free(ctx); + return NULL; + } } + ctx->numThreads = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->numThreads; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_free(ctx->queue, ctx->customMem); + ZSTD_free(ctx->threads, ctx->customMem); + ZSTD_free(ctx, ctx->customMem); +} + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->numThreads * sizeof(ZSTD_pthread_t); +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * If the queueSize is 1 (the pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free and no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return ctx->numThreadsBusy == ctx->numThreads || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_ctx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_ctx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_ctx); + (void)ctx; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_ctx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/pool.h b/c-blosc/internal-complibs/zstd-1.3.4/common/pool.h new file mode 100644 index 0000000..a57e9b4 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/pool.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +#include "zstd.h" + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem); + +/*! POOL_free() : + Free a thread pool returned by POOL_create(). +*/ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_sizeof() : + return memory usage of pool returned by POOL_create(). +*/ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + The function type that can be added to a thread pool. +*/ +typedef void (*POOL_function)(void*); +/*! POOL_add_function : + The function type for a generic thread pool add function. +*/ +typedef void (*POOL_add_function)(void*, POOL_function, void*); + +/*! POOL_add() : + Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + Possibly blocks until there is room in the queue. + Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. +*/ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + Add the job `function(opaque)` to the thread pool if a worker is available. + return immediately otherwise. + @return : 1 if successful, 0 if not. +*/ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/threading.c b/c-blosc/internal-complibs/zstd-1.3.4/common/threading.c new file mode 100644 index 0000000..8be8c8d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/threading.c @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/* create fake symbol to avoid empty trnaslation unit warning */ +int g_ZSTD_threading_useles_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/threading.h b/c-blosc/internal-complibs/zstd-1.3.4/common/threading.h new file mode 100644 index 0000000..d806c89 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/threading.h @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.c b/c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.c new file mode 100644 index 0000000..9d9c0e9 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.c @@ -0,0 +1,875 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +#include "xxhash.h" + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR + + +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.h b/c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.h new file mode 100644 index 0000000..9bad1f5 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/xxhash.h @@ -0,0 +1,305 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +# include "xxhash.c" /* include xxhash functions as `static`, for inlining */ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_common.c b/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_common.c new file mode 100644 index 0000000..bccc948 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_common.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, calloc, free */ +#include /* memset */ +#include "error_private.h" +#include "zstd_internal.h" + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +/*! ZSTD_isError() : + * tells if a return value is an error code */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + +/*! g_debuglog_enable : + * turn on/off debug traces (global switch) */ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) +int g_debuglog_enable = 1; +#endif + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return malloc(size); +} + +void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + memset(ptr, 0, size); + return ptr; + } + return calloc(1, size); +} + +void ZSTD_free(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + free(ptr); + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_errors.h b/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_errors.h new file mode 100644 index 0000000..57533f2 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_errors.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_internal.h b/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_internal.h new file mode 100644 index 0000000..65c08a8 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/common/zstd_internal.h @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#include "compiler.h" +#include "mem.h" +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +#include "xxhash.h" /* XXH_reset, update, digest */ + + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-************************************* +* Debug +***************************************/ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } + +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) +# include +extern int g_debuglog_enable; +/* recommended values for ZSTD_DEBUG display levels : + * 1 : no display, enables assert() only + * 2 : reserved for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (*very* verbose) */ +# define RAWLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) +#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ +#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */ +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 +static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/fse_compress.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/fse_compress.c new file mode 100644 index 0000000..cb8f1fa --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/fse_compress.c @@ -0,0 +1,849 @@ +/* ****************************************************************** + FSE : Finite State Entropy encoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; + + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; + U32 highThreshold = tableSize-1; + + /* CTable header */ + if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u<=maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurences; + for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + bitStream = 0; + bitCount = 0; + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while (remaining>1) { /* stops at 1 */ + if (previous0) { + unsigned start = charnum; + while (!normalizedCounter[charnum]) charnum++; + while (charnum >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (charnum >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (charnum-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[charnum++]; + int const max = (2*threshold-1)-remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); +} + + + +/*-************************************************************** +* Counting histogram +****************************************************************/ +/*! FSE_count_simple + This function counts byte values within `src`, and store the histogram into table `count`. + It doesn't use any additional memory. + But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element. +*/ +size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + + memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip max) max = count[s]; } + + return (size_t)max; +} + + +/* FSE_count_parallel_wksp() : + * Same as FSE_count_parallel(), but using an externally provided scratch buffer. + * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`. + * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ +static size_t FSE_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax, unsigned* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ipmaxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; + if (maxSymbolValue > 255) maxSymbolValue = 255; + for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned* workSpace) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[1024]; + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); +} + +/* FSE_count_wksp() : + * Same as FSE_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) +{ + if (*maxSymbolValuePtr < 255) + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); + *maxSymbolValuePtr = 255; + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); +} + +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[1024]; + return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); +} + + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable +Allocation is manual (C standard does not support variable-size structures). +*/ +size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); +} + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + U64 const scale = 62 - tableLog; + U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + printf("%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1< not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + + return op-ostart; +} + +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; +} fseWkspMax_t; + +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/huf_compress.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/huf_compress.c new file mode 100644 index 0000000..83230b4 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/huf_compress.c @@ -0,0 +1,788 @@ +/* ****************************************************************** + Huffman encoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 +size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + BYTE scratchBuffer[1< not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return op-ostart; +} + + +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt within "huf.h" */ + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; + + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; n HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits */ + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + U32 n = lastNonNull; + + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ + + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + int pos; + + /* Get pos of last (smallest) symbol per rank */ + memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = pos; + } } + + while (totalCost > 0) { + U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 highPos = rankLast[nBitsToDecrease]; + U32 lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease ++; + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } } /* while (totalCost > 0) */ + + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + rankLast[1] = n+1; + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } } } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + + +typedef struct { + U32 base; + U32 current; +} rankPos; + +static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +{ + rankPos rank[32]; + U32 n; + + memset(rank, 0, sizeof(rank)); + for (n=0; n<=maxSymbolValue; n++) { + U32 r = BIT_highbit32(count[n] + 1); + rank[r].base ++; + } + for (n=30; n>0; n--) rank[n-1].base += rank[n].base; + for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=0; n<=maxSymbolValue; n++) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rank[r].current++; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; +size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) +{ + nodeElt* const huffNode0 = (nodeElt*)workSpace; + nodeElt* const huffNode = huffNode0+1; + U32 n, nonNullRank; + int lowS, lowN; + U16 nodeNb = STARTNODE; + U32 nodeRoot; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue); + + /* init for parents */ + nonNullRank = maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + + /* fill result into tree (val, nbBits) */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine stating value per rank */ + { U16 min = 0; + for (n=maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; n<=maxSymbolValue; n++) + tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ + for (n=0; n<=maxSymbolValue; n++) + tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ + } + + return maxNbBits; +} + +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +{ + huffNodeTable nodeTable; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); +} + +static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return op-ostart; +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + unsigned singleStream, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = singleStream ? + HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return op-ostart; +} + +typedef struct { + U32 count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + huffNodeTable nodeTable; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t HUF_compress_internal ( + void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + unsigned singleStream, + void* workSpace, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + /* checks & inits */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + table->nodeTable, sizeof(table->nodeTable)) ); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + singleStream, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 1 /*single stream*/, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 1 /*single stream*/, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 0 /*4 streams*/, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, 0 /* 4 streams */, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress.c new file mode 100644 index 0000000..2aa26da --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress.c @@ -0,0 +1,3449 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************* +* Tuning parameters +***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* memset */ +#include "cpu.h" +#include "mem.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_compress_internal.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" +#include "zstd_ldm.h" + + +/*-************************************* +* Helper functions +***************************************/ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictContentSize; + void* workspace; + size_t workspaceSize; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_compressionParameters cParams; + ZSTD_customMem customMem; + U32 dictID; +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_calloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + cctx->customMem = customMem; + cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; + cctx->requestedParams.fParams.contentSizeFlag = 1; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ + cctx->staticSize = workspaceSize; + cctx->workSpace = (void*)(cctx+1); + cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; + assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; + cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; + { + void* const ptr = cctx->blockState.nextCBlock + 1; + cctx->entropyWorkspace = (U32*)ptr; + } + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */ + ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; + ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL; +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif + ZSTD_free(cctx, cctx->customMem); + return 0; /* reserved as a potential error code in the future */ +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void) cctx; + return 0; +#endif +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*cctx) + cctx->workSpaceSize + + ZSTD_sizeof_CDict(cctx->cdictLocal) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; + if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; + if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; + if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; + if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength; + if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; + if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; + return cParams; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + memset(&cctxParams, 0, sizeof(cctxParams)); + cctxParams.cParams = cParams; + cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(cParams)); + cctxParams.fParams.contentSizeFlag = 1; + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_calloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + params->customMem = customMem; + params->compressionLevel = ZSTD_CLEVEL_DEFAULT; + params->fParams.contentSizeFlag = 1; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_free(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + if (!cctxParams) { return ERROR(GENERIC); } + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + if (!cctxParams) { return ERROR(GENERIC); } + CHECK_F( ZSTD_checkCParams(params.cParams) ); + memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params.cParams; + cctxParams->fParams = params.fParams; + cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); + return 0; +} + +/* ZSTD_assignParamsToCCtxParams() : + * params is presumed valid at this stage */ +static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( + ZSTD_CCtx_params cctxParams, ZSTD_parameters params) +{ + ZSTD_CCtx_params ret = cctxParams; + ret.cParams = params.cParams; + ret.fParams = params.fParams; + ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); + return ret; +} + +#define CLAMPCHECK(val,min,max) { \ + if (((val)<(min)) | ((val)>(max))) { \ + return ERROR(parameter_outOfBound); \ +} } + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_p_compressionLevel: + case ZSTD_p_hashLog: + case ZSTD_p_chainLog: + case ZSTD_p_searchLog: + case ZSTD_p_minMatch: + case ZSTD_p_targetLength: + case ZSTD_p_compressionStrategy: + case ZSTD_p_compressLiterals: + return 1; + + case ZSTD_p_format: + case ZSTD_p_windowLog: + case ZSTD_p_contentSizeFlag: + case ZSTD_p_checksumFlag: + case ZSTD_p_dictIDFlag: + case ZSTD_p_forceMaxWindow : + case ZSTD_p_nbWorkers: + case ZSTD_p_jobSize: + case ZSTD_p_overlapSizeLog: + case ZSTD_p_enableLongDistanceMatching: + case ZSTD_p_ldmHashLog: + case ZSTD_p_ldmMinMatch: + case ZSTD_p_ldmBucketSizeLog: + case ZSTD_p_ldmHashEveryLog: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + return ERROR(stage_wrong); + } } + + switch(param) + { + case ZSTD_p_format : + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_compressionLevel: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_windowLog: + case ZSTD_p_hashLog: + case ZSTD_p_chainLog: + case ZSTD_p_searchLog: + case ZSTD_p_minMatch: + case ZSTD_p_targetLength: + case ZSTD_p_compressionStrategy: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_compressLiterals: + case ZSTD_p_contentSizeFlag: + case ZSTD_p_checksumFlag: + case ZSTD_p_dictIDFlag: + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize, + * even when referencing into Dictionary content. + * default : 0 when using a CDict, 1 when using a Prefix */ + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_nbWorkers: + if ((value>0) && cctx->staticSize) { + return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ + } + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_jobSize: + case ZSTD_p_overlapSizeLog: + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_enableLongDistanceMatching: + case ZSTD_p_ldmHashLog: + case ZSTD_p_ldmMinMatch: + case ZSTD_p_ldmBucketSizeLog: + case ZSTD_p_ldmHashEveryLog: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + default: return ERROR(parameter_unsupported); + } +} + +size_t ZSTD_CCtxParam_setParameter( + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned value) +{ + DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%u, %u)", (U32)param, value); + switch(param) + { + case ZSTD_p_format : + if (value > (unsigned)ZSTD_f_zstd1_magicless) + return ERROR(parameter_unsupported); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_p_compressionLevel : { + int cLevel = (int)value; /* cast expected to restore negative sign */ + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevel) { /* 0 : does not change current level */ + CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */ + CCtxParams->compressionLevel = cLevel; + } + if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_p_windowLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CCtxParams->cParams.windowLog = value; + return CCtxParams->cParams.windowLog; + + case ZSTD_p_hashLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CCtxParams->cParams.hashLog = value; + return CCtxParams->cParams.hashLog; + + case ZSTD_p_chainLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CCtxParams->cParams.chainLog = value; + return CCtxParams->cParams.chainLog; + + case ZSTD_p_searchLog : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CCtxParams->cParams.searchLog = value; + return value; + + case ZSTD_p_minMatch : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + CCtxParams->cParams.searchLength = value; + return CCtxParams->cParams.searchLength; + + case ZSTD_p_targetLength : + /* all values are valid. 0 => use default */ + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_p_compressionStrategy : + if (value>0) /* 0 => use default */ + CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_p_compressLiterals: + CCtxParams->disableLiteralCompression = !value; + return !CCtxParams->disableLiteralCompression; + + case ZSTD_p_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value>0)); + CCtxParams->fParams.contentSizeFlag = value > 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_p_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value > 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value>0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_p_forceMaxWindow : + CCtxParams->forceWindow = (value > 0); + return CCtxParams->forceWindow; + + case ZSTD_p_nbWorkers : +#ifndef ZSTD_MULTITHREAD + if (value>0) return ERROR(parameter_unsupported); + return 0; +#else + return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); +#endif + + case ZSTD_p_jobSize : +#ifndef ZSTD_MULTITHREAD + return ERROR(parameter_unsupported); +#else + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); +#endif + + case ZSTD_p_overlapSizeLog : +#ifndef ZSTD_MULTITHREAD + return ERROR(parameter_unsupported); +#else + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value); +#endif + + case ZSTD_p_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value>0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_p_ldmHashLog : + if (value>0) /* 0 ==> auto */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_p_ldmMinMatch : + if (value>0) /* 0 ==> default */ + CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_p_ldmBucketSizeLog : + if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) + return ERROR(parameter_outOfBound); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_p_ldmHashEveryLog : + if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) + return ERROR(parameter_outOfBound); + CCtxParams->ldmParams.hashEveryLog = value; + return CCtxParams->ldmParams.hashEveryLog; + + default: return ERROR(parameter_unsupported); + } +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + if (cctx->cdict) return ERROR(stage_wrong); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */ + if (dict==NULL || dictSize==0) { /* no dictionary mode */ + cctx->cdictLocal = NULL; + cctx->cdict = NULL; + } else { + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); + cctx->cdictLocal = ZSTD_createCDict_advanced( + dict, dictSize, + dictLoadMethod, dictContentType, + cParams, cctx->customMem); + cctx->cdict = cctx->cdictLocal; + if (cctx->cdictLocal == NULL) + return ERROR(memory_allocation); + } + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->cdict = cdict; + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */ + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + cctx->cdict = NULL; /* prefix discards any prior cdict */ + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + return 0; +} + +static void ZSTD_startNewCompression(ZSTD_CCtx* cctx) +{ + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +void ZSTD_CCtx_reset(ZSTD_CCtx* cctx) +{ + ZSTD_startNewCompression(cctx); + cctx->cdict = NULL; +} + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) + return ERROR(parameter_unsupported); + if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) + return ERROR(parameter_unsupported); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP(val,min,max) { \ + if (valmax) val=max; \ + } + CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN; + if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra; + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_adjustCParams_internal() : + optimize `cPar` for a given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization latency. + Both `srcSize` and `dictSize` are optional (use 0 if unknown). + Note : cPar is considered validated at this stage. Use ZSTD_checkCParams() to ensure that condition. */ +ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) +{ + static const U64 minSrcSize = 513; /* (1<<9) + 1 */ + static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + if (dictSize && (srcSize+1<2) /* srcSize unknown */ ) + srcSize = minSrcSize; /* presumed small when there is a dictionary */ + else if (srcSize == 0) + srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; + { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cycleLog > cPar.windowLog) + cPar.chainLog -= (cycleLog - cPar.windowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + + return cPar; +} + +ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); +} + +static size_t ZSTD_sizeof_matchState(ZSTD_compressionParameters const* cParams, const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<strategy == ZSTD_btopt) || + (cParams->strategy == ZSTD_btultra))) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + /* Estimate CCtx size is supported for single-threaded compression only. */ + if (params->nbWorkers > 0) { return ERROR(GENERIC); } + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, 0, 0); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + U32 const divider = (cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const entropySpace = HUF_WORKSPACE_SIZE; + size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); + size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); + + size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + + matchStateSize + ldmSpace + ldmSeqSpace; + + DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); + DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); + return sizeof(ZSTD_CCtx) + neededSpace; + } +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + return ZSTD_estimateCCtxSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=1; level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + if (params->nbWorkers > 0) { return ERROR(GENERIC); } + { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); + size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; + size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; + size_t const streamingSize = inBuffSize + outBuffSize; + + return CCtxSize + streamingSize; + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) { + int level; + size_t memBudget = 0; + for (level=1; level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + return fp; +} } + + +static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + return (cParams1.hashLog == cParams2.hashLog) + & (cParams1.chainLog == cParams2.chainLog) + & (cParams1.strategy == cParams2.strategy) /* opt parser space */ + & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */ +} + +/** The parameters are equivalent if ldm is not enabled in both sets or + * all the parameters are equivalent. */ +static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, + ldmParams_t ldmParams2) +{ + return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || + (ldmParams1.enableLdm == ldmParams2.enableLdm && + ldmParams1.hashLog == ldmParams2.hashLog && + ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && + ldmParams1.minMatchLength == ldmParams2.minMatchLength && + ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); +} + +typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; + +/* ZSTD_sufficientBuff() : + * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . + * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ +static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1, + ZSTD_buffered_policy_e buffPol2, + ZSTD_compressionParameters cParams2, + U64 pledgedSrcSize) +{ + size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); + size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); + size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; + DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u", + (U32)windowSize2, cParams2.windowLog); + DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u", + (U32)blockSize2, (U32)blockSize1); + return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */ + & (neededBufferSize2 <= bufferSize1); +} + +/** Equivalence for resetCCtx purposes */ +static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, + ZSTD_CCtx_params params2, + size_t buffSize1, size_t blockSize1, + ZSTD_buffered_policy_e buffPol2, + U64 pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); + return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && + ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) && + ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize); +} + +static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.hufCTable_repeatMode = HUF_repeat_none; + bs->entropy.offcode_repeatMode = FSE_repeat_none; + bs->entropy.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit + 1; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ +} + +/*! ZSTD_continueCCtx() : + * reuse CCtx without reset (note : requires no dictionary) */ +static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); + + cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ + cctx->appliedParams = params; + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + cctx->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); + cctx->stage = ZSTDcs_init; + cctx->dictID = 0; + if (params.ldmParams.enableLdm) + ZSTD_window_clear(&cctx->ldmState.window); + ZSTD_referenceExternalSequences(cctx, NULL, 0); + ZSTD_invalidateMatchState(&cctx->blockState.matchState); + ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); + XXH64_reset(&cctx->xxhState, 0); + return 0; +} + +typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; + +static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + + assert(((size_t)ptr & 3) == 0); + + ms->hashLog3 = hashLog3; + memset(&ms->window, 0, sizeof(ms->window)); + ZSTD_invalidateMatchState(ms); + + /* opt parser space */ + if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (U32*)ptr; + ms->opt.litLengthFreq = ms->opt.litFreq + (1<opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); + ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); + ptr = ms->opt.offCodeFreq + (MaxOff+1); + ms->opt.matchTable = (ZSTD_match_t*)ptr; + ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; + ms->opt.priceTable = (ZSTD_optimal_t*)ptr; + ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; + } + + /* table Space */ + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ + ms->hashTable = (U32*)(ptr); + ms->chainTable = ms->hashTable + hSize; + ms->hashTable3 = ms->chainTable + chainSize; + ptr = ms->hashTable3 + h3Size; + + assert(((size_t)ptr & 3) == 0); + return ptr; +} + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, U64 pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + if (crp == ZSTDcrp_continue) { + if (ZSTD_equivalentParams(zc->appliedParams, params, + zc->inBuffSize, zc->blockSize, + zbuff, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)", + zc->appliedParams.cParams.windowLog, (U32)zc->blockSize); + return ZSTD_continueCCtx(zc, params, pledgedSrcSize); + } } + DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + params.ldmParams.windowLog = params.cParams.windowLog; + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashEveryLog < 32); + zc->ldmState.hashPower = + ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; + size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + void* ptr; + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const entropySpace = HUF_WORKSPACE_SIZE; + size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); + size_t const bufferSpace = buffInSize + buffOutSize; + size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); + size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); + + size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + + ldmSeqSpace + matchStateSize + tokenSpace + + bufferSpace; + DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers", + (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10)); + DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize); + + if (zc->workSpaceSize < neededSpace) { /* too small : resize */ + DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK", + (unsigned)(zc->workSpaceSize>>10), + (unsigned)(neededSpace>>10)); + /* static cctx : no resize, error out */ + if (zc->staticSize) return ERROR(memory_allocation); + + zc->workSpaceSize = 0; + ZSTD_free(zc->workSpace, zc->customMem); + zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + ptr = zc->workSpace; + + /* Statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ + assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; + zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; + ptr = zc->blockState.nextCBlock + 1; + zc->entropyWorkspace = (U32*)ptr; + } } + + /* init params */ + zc->appliedParams = params; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; + + /* ldm hash table */ + /* initialize bucketOffsets table later for pointer alignment */ + if (params.ldmParams.enableLdm) { + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->ldmState.hashTable = (ldmEntry_t*)ptr; + ptr = zc->ldmState.hashTable + ldmHSize; + zc->ldmSequences = (rawSeq*)ptr; + ptr = zc->ldmSequences + maxNbLdmSeq; + zc->maxNbLdmSequences = maxNbLdmSeq; + + memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + } + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + + ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); + + /* sequences storage */ + zc->seqStore.sequencesStart = (seqDef*)ptr; + ptr = zc->seqStore.sequencesStart + maxNbSeq; + zc->seqStore.llCode = (BYTE*) ptr; + zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; + zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; + ptr = zc->seqStore.litStart + blockSize; + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + size_t const ldmBucketSize = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + memset(ptr, 0, ldmBucketSize); + zc->ldmState.bucketOffsets = (BYTE*)ptr; + ptr = zc->ldmState.bucketOffsets + ldmBucketSize; + ZSTD_window_clear(&zc->ldmState.window); + } + ZSTD_referenceExternalSequences(zc, NULL, 0); + + /* buffers */ + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ptr; + zc->outBuffSize = buffOutSize; + zc->outBuff = zc->inBuff + buffInSize; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + unsigned windowLog, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { ZSTD_CCtx_params params = cctx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = cdict->cParams; + if (windowLog) params.cParams.windowLog = windowLog; + params.fParams = fParams; + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_noMemset, zbuff); + assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); + } + + /* copy tables */ + { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); + size_t const hSize = (size_t)1 << cdict->cParams.hashLog; + size_t const tableSpace = (chainSize + hSize) * sizeof(U32); + assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); + assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); + memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + /* Zero the hashTable3, since the cdict never fills it */ + { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + assert(cdict->matchState.hashLog3 == 0); + memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + /* copy dictionary offsets */ + { + ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + cctx->dictID = cdict->dictID; + + /* copy block state */ + memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); + + memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_noMemset, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); + memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* copy dictionary offsets */ + { + ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + + /* copy block state */ + memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columnblockState.matchState; + { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (zc->appliedParams.cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; + if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + + +static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } + +static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32* workspace, const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", + disableLiteralCompression); + + /* Prepare nextEntropy assuming reusing the existing table */ + nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode; + memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, + sizeof(prevEntropy->hufCTable)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2) + : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextEntropy->hufCTable_repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + return lhSize+cLitSize; +} + + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +MEM_STATIC +symbolEncodingType_e ZSTD_selectEncodingType( + FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, + U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) +{ +#define MIN_SEQ_FOR_DYNAMIC_FSE 64 +#define MAX_SEQ_FOR_STATIC_FSE 1000 + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { + DEBUGLOG(5, "Selected set_rle"); + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + *repeatMode = FSE_repeat_check; + return set_rle; + } + if ( isDefaultAllowed + && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( isDefaultAllowed + && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +MEM_STATIC +size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + U32* count, U32 max, + BYTE const* codeTable, size_t nbSeq, + S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + FSE_CTable const* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + BYTE const* const oend = op + dstCapacity; + + switch (type) { + case set_rle: + *op = codeTable[0]; + CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max)); + return 1; + case set_repeat: + memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return NCountSize; + CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + return NCountSize; + } + } + default: return assert(0), ERROR(GENERIC); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, + ZSTD_entropyCTables_t const* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + ZSTD_CCtx_params const* cctxParams, + void* dst, size_t dstCapacity, U32* workspace, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + U32 count[MaxSeq+1]; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* seqHead; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; + size_t const litSize = seqStorePtr->lit - literals; + size_t const cSize = ZSTD_compressLiterals( + prevEntropy, nextEntropy, + cctxParams->cParams.strategy, cctxParams->disableLiteralCompression, + op, dstCapacity, + literals, litSize, + workspace, bmi2); + if (ZSTD_isError(cSize)) + return cSize; + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable)); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable)); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + workspace, HUF_WORKSPACE_SIZE); + if (ZSTD_isError(countSize)) return countSize; + op += countSize; + } } + /* build CTable for Offsets */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + workspace, HUF_WORKSPACE_SIZE); + if (ZSTD_isError(countSize)) return countSize; + op += countSize; + } } + /* build CTable for MatchLengths */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); + DEBUGLOG(5, "Building ML table"); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + workspace, HUF_WORKSPACE_SIZE); + if (ZSTD_isError(countSize)) return countSize; + op += countSize; + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + if (ZSTD_isError(bitstreamSize)) return bitstreamSize; + op += bitstreamSize; + } + + return op - ostart; +} + +MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, + ZSTD_entropyCTables_t const* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + ZSTD_CCtx_params const* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, U32* workspace, int bmi2) +{ + size_t const cSize = ZSTD_compressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, + workspace, bmi2); + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + if (ZSTD_isError(cSize)) return cSize; + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (nextEntropy->offcode_repeatMode == FSE_repeat_valid) + nextEntropy->offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +{ + static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } + }; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert((U32)strat >= (U32)ZSTD_fast); + assert((U32)strat <= (U32)ZSTD_btultra); + return blockCompressor[extDict!=0][(U32)strat]; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength); + return 0; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (current > ms->nextToUpdate + 384) + ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { U32 const extDict = ZSTD_window_hasExtDict(ms->window); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + &zc->appliedParams.cParams, + src, srcSize, extDict); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize)); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + &zc->appliedParams.cParams, + src, srcSize, extDict); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + + /* encode sequences and literals */ + { size_t const cSize = ZSTD_compressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, zc->entropyWorkspace, zc->bmi2); + if (ZSTD_isError(cSize) || cSize == 0) return cSize; + /* confirm repcodes and entropy tables */ + { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; + } + return cSize; + } +} + + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + assert(cctx->appliedParams.cParams.windowLog <= 31); + + DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (U32)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) + return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + if (remaining < blockSize) blockSize = remaining; + + if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { + U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + + ZSTD_reduceIndex(cctx, correction); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + ms->loadedDictEnd = 0; + } + ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd); + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + + if (cSize == 0) { /* block is not compressible */ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); + if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); /* 4th byte will be overwritten */ + memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_blockHeaderSize + blockSize; + } else { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (U32)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return op-ostart; +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params.fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params.cParams.windowLog; + U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params.fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params.fParams.noDictIDFlag, dictID, dictIDSizeCode); + + if (params.format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDecriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapcity` is too small (stage != ZSTDcs_init) + return ERROR(stage_wrong); + if (cctx->appliedParams.ldmParams.enableLdm) + return ERROR(parameter_unsupported); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (U32)srcSize); + if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */ + if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) { + DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize); + return ERROR(srcSize_wrong); + } + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (U32)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize) +{ + const BYTE* const ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + ZSTD_compressionParameters const* cParams = ¶ms->cParams; + + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + if (srcSize <= HASH_READ_SIZE) return 0; + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, cParams, iend); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, cParams, iend); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (srcSize >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE); + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + if (srcSize >= HASH_READ_SIZE) + ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + when FSE encoding. Refuse dictionaries that assign zero probability to symbols + that we may encounter during compression. + NOTE: This behavior is not standard and could be improved in the future. */ +static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + } + return 0; +} + + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed > 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + size_t dictID; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); + dictPtr += 4; + + { unsigned maxSymbolValue = 255; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + if (maxSymbolValue < 255) return ERROR(dictionary_corrupted); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE), + dictionary_corrupted); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + /* Every match length code must have non-zero probability */ + CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE), + dictionary_corrupted); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + /* Every literal length code must have non-zero probability */ + CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE), + dictionary_corrupted); + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable */ + CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + /* All repCodes must be <= dictContentSize and != 0*/ + { U32 u; + for (u=0; u<3; u++) { + if (bs->rep[u] == 0) return ERROR(dictionary_corrupted); + if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); + } } + + bs->entropy.hufCTable_repeatMode = HUF_repeat_valid; + bs->entropy.offcode_repeatMode = FSE_repeat_valid; + bs->entropy.matchlength_repeatMode = FSE_repeat_valid; + bs->entropy.litlength_repeatMode = FSE_repeat_valid; + CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize)); + return dictID; + } +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<=8)) return 0; + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, params, dict, dictSize); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent(ms, params, dict, dictSize); + } + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_wrong); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace); +} + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + if (cdict && cdict->dictContentSize>0) { + cctx->requestedParams = params; + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog, + params.fParams, pledgedSrcSize, zbuff); + } + + CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_continue, zbuff) ); + { + size_t const dictID = ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + ¶ms, dict, dictSize, dictContentType, cctx->entropyWorkspace); + if (ZSTD_isError(dictID)) return dictID; + assert(dictID <= (size_t)(U32)-1); + cctx->dictID = (U32)dictID; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); + /* compression parameters verification and optimization */ + CHECK_F( ZSTD_checkCParams(params.cParams) ); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, + NULL /*cdict*/, + cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL, + cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + if (ZSTD_isError(cSize)) return cSize; + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + if (ZSTD_isError(endResult)) return endResult; + if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */ + DEBUGLOG(4, "end of frame : controlling src size"); + if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) { + DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u", + (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize); + return ERROR(srcSize_wrong); + } } + return cSize + endResult; +} + + +static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compress_internal"); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + cctxParams); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced"); + CHECK_F(ZSTD_checkCParams(params.cParams)); + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_CCtx_params params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", + (U32)srcSize); + CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL, + params, srcSize, ZSTDb_not_buffered) ); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, + const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); +} + +size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + size_t result; + ZSTD_CCtx ctxBody; + memset(&ctxBody, 0, sizeof(ctxBody)); + ctxBody.customMem = ZSTD_defaultCMem; + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */ + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict)); + return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict)); + return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType); + assert(!ZSTD_checkCParams(cParams)); + cdict->cParams = cParams; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictBuffer = NULL; + cdict->dictContent = dictBuffer; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); + cdict->dictBuffer = internalBuffer; + cdict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + { void* const end = ZSTD_reset_matchState( + &cdict->matchState, + (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, + &cParams, ZSTDcrp_continue, /* forCCtx */ 0); + assert(end == (char*)cdict->workspace + cdict->workspaceSize); + (void)end; + } + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is <= 8 bytes. + */ + { ZSTD_CCtx_params params; + memset(¶ms, 0, sizeof(params)); + params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + params.cParams = cParams; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, ¶ms, + cdict->dictContent, cdict->dictContentSize, + dictContentType, cdict->workspace); + if (ZSTD_isError(dictID)) return dictID; + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); + size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + void* const workspace = ZSTD_malloc(workspaceSize, customMem); + + if (!cdict || !workspace) { + ZSTD_free(cdict, customMem); + ZSTD_free(workspace, customMem); + return NULL; + } + cdict->customMem = customMem; + cdict->workspace = workspace; + cdict->workspaceSize = workspaceSize; + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + cParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + ZSTD_free(cdict->workspace, cMem); + ZSTD_free(cdict->dictBuffer, cMem); + ZSTD_free(cdict, cMem); + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) + + HUF_WORKSPACE_SIZE + matchStateSize; + ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; + void* ptr; + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(cdict+1, dict, dictSize); + dict = cdict+1; + ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; + } else { + ptr = cdict+1; + } + cdict->workspace = ptr; + cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType, + cParams) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->cParams; +} + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + if (cdict==NULL) return ERROR(dictionary_wrong); + { ZSTD_CCtx_params params = cctx->requestedParams; + params.cParams = ZSTD_getCParamsFromCDict(cdict); + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); + } + params.fParams = fParams; + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); + } +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, + const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, + const ZSTD_CDict* const cdict, + ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)", + params.disableLiteralCompression); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + CHECK_F( ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, + cdict, + params, pledgedSrcSize, + ZSTDb_buffered) ); + + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + cctx->inBuffTarget = cctx->blockSize + + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + return 0; /* ready to go */ +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params params = zcs->requestedParams; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + params.fParams.contentSizeFlag = 1; + params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, 0); + return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + if (dict && dictSize >= 8) { + DEBUGLOG(4, "loading dictionary of size %u", (U32)dictSize); + if (zcs->staticSize) { /* static CCtx : never uses malloc */ + /* incompatible with internal cdict creation */ + return ERROR(memory_allocation); + } + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + params.cParams, zcs->customMem); + zcs->cdict = zcs->cdictLocal; + if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + if (cdict) { + params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ + } + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = NULL; + zcs->cdict = cdict; + } + + return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ + { ZSTD_CCtx_params params = zcs->requestedParams; + params.cParams = ZSTD_getCParamsFromCDict(cdict); + params.fParams = fParams; + return ZSTD_initCStream_internal(zcs, + NULL, 0, cdict, + params, pledgedSrcSize); + } +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", + (U32)pledgedSrcSize, params.fParams.contentSizeFlag); + CHECK_F( ZSTD_checkCParams(params.cParams) ); + if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ + { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize); + } +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + ZSTD_CCtx_params const cctxParams = + ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); + ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize); +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); +} + +/*====== Compression ======*/ + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length) memcpy(dst, src, length); + return length; +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants and *compress_generic() + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = istart + input->size; + const char* ip = istart + input->pos; + char* const ostart = (char*)output->dst; + char* const oend = ostart + output->size; + char* op = ostart + output->pos; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode); + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + /* call ZSTD_initCStream() first ! */ + return ERROR(init_missing); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize); + if (ZSTD_isError(cSize)) return cSize; + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_startNewCompression(zcs); + someMoreWork = 0; break; + } + /* complete loading into inBuffer */ + { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { void* cDst; + size_t cSize; + size_t const iSize = zcs->inBuffPos - zcs->inToCompress; + size_t oSize = oend-op; + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_startNewCompression(zcs); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, oend-op, + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (U32)toFlush, (U32)(oend-op), (U32)flushed); + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_startNewCompression(zcs); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; + if (hintInSize==0) hintInSize = zcs->blockSize; + return hintInSize; + } +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + /* check conditions */ + if (output->pos > output->size) return ERROR(GENERIC); + if (input->pos > input->size) return ERROR(GENERIC); + + return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue); +} + + +size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compress_generic, endOp=%u ", (U32)endOp); + /* check conditions */ + if (output->pos > output->size) return ERROR(GENERIC); + if (input->pos > input->size) return ERROR(GENERIC); + assert(cctx!=NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ + params.cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { + /* mt context creation */ + if (cctx->mtctx == NULL || (params.nbWorkers != ZSTDMT_getNbWorkers(cctx->mtctx))) { + DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + if (cctx->mtctx != NULL) + DEBUGLOG(4, "ZSTD_compress_generic: previous nbWorkers was %u", + ZSTDMT_getNbWorkers(cctx->mtctx)); + ZSTDMT_freeCCtx(cctx->mtctx); + cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); + if (cctx->mtctx == NULL) return ERROR(memory_allocation); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + CHECK_F( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); + cctx->streamStage = zcss_load; + cctx->appliedParams.nbWorkers = params.nbWorkers; + } else +#endif + { CHECK_F( ZSTD_resetCStream_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, + params, cctx->pledgedSrcSizePlusOne-1) ); + assert(cctx->streamStage == zcss_load); + assert(cctx->appliedParams.nbWorkers == 0); + } } + + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + ZSTD_startNewCompression(cctx); + } + return flushMin; + } } +#endif + CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) ); + DEBUGLOG(5, "completed ZSTD_compress_generic"); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compress_generic_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + if (output->pos > output->size) return ERROR(GENERIC); + CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) ); + return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + if (output->pos > output->size) return ERROR(GENERIC); + CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) ); + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; + size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (U32)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - guarantees a monotonically increasing memory budget */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */ + { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */ + { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */ + { 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */ + { 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */ + { 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */ + { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */ + { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */ + { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */ + { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */ + { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */ + { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */ + { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */ + { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */ + { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/ + { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/ + { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/ + { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */ + { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/ + { 18, 18, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 19, 17, 7, 4, 8, ZSTD_btlazy2 }, /* level 13 */ + { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ + { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ + { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/ + { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/ + { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */ + { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */ + { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */ + { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */ + { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */ + { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ + { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */ + { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */ + { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/ + { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ + { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/ + { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */ + { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */ + { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/ + { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/ + { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/ + { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/ +}, +}; + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. +* Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + size_t const addedSize = srcSizeHint ? 0 : 500; + U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ + int row = compressionLevel; + DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } + +} + +/*! ZSTD_getParams() : +* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). +* All fields of `ZSTD_frameParameters` are set to default (0) */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress_internal.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress_internal.h new file mode 100644 index 0000000..81f12ca --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_compress_internal.h @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd_internal.h" +#ifdef ZSTD_MULTITHREAD +# include "zstdmt_compress.h" +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionnally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + HUF_repeat hufCTable_repeatMode; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; + U32 len; +} ZSTD_match_t; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + U32* litFreq; /* table of literals statistics, of size 256 */ + U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + /* begin updated by ZSTD_setLog2Prices */ + U32 log2litSum; /* pow2 to compare log2(litfreq) to */ + U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */ + U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ + U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ + /* end : updated by ZSTD_setLog2Prices */ + U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */ +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ +} ZSTD_window_t; + +typedef struct { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 nextToUpdate3; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + optState_t opt; /* optimal parser state */ +} ZSTD_matchState_t; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + U64 hashPower; /* Used to compute the rolling hash. + * Depends on ldmParams.minMatchLength */ +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashEveryLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The position where reading stopped. <= size. */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity of the `seq` pointer */ +} rawSeqStore_t; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int disableLiteralCompression; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). + * `mlBase` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) +{ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + } +#endif + /* copy Literals */ + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offsetCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/*-************************************* +* Round buffer management +***************************************/ +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const current = (U32)((BYTE const*)srcEnd - window.base); + return current > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const newCurrent = (current & cycleMask) + maxDist; + U32 const correction = current - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(current > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + window->lowLimit -= correction; + window->dictLimit -= correction; + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * This allows a simple check that index >= lowLimit to see if index is valid. + * This must be called before a block compression call, with srcEnd as the block + * source end. + * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. + * This is because dictionaries are allowed to be referenced as long as the last + * byte of the dictionary is in the window, but once they are out of range, + * they cannot be referenced. If loadedDictEndPtr is NULL, we use + * loadedDictEnd == 0. + */ +MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + void const* srcEnd, U32 maxDist, + U32* loadedDictEndPtr) +{ + U32 const current = (U32)((BYTE const*)srcEnd - window->base); + U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; + if (current > maxDist + loadedDictEnd) { + U32 const newLowLimit = current - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit, + window->lowLimit); + window->dictLimit = window->lowLimit; + } + if (loadedDictEndPtr) + *loadedDictEndPtr = 0; + } +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", + window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + // ms->nextToUpdate = window->dictLimit; + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + } + return contiguous; +} + +#if defined (__cplusplus) +} +#endif + + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +/*! ZSTD_compressStream_generic() : + * Private use only. To be called from zstdmt_compress.c in single-thread mode. */ +size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_CCtx_params params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapcity` is too small (hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = current + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = current + i; + } + } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + U32 const mls /* template */) +{ + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ms->window.dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 const matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + + assert(offset_1 <= current); /* supposed guaranteed by construction */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + /* favor repcode */ + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { + size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } + + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + const U32 mls = cParams->searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + U32 const mls /* template */) +{ + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ms->window.lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const mls = cParams->searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7); + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_double_fast.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_double_fast.h new file mode 100644 index 0000000..6d80b27 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_double_fast.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "mem.h" /* U32 */ +#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.c new file mode 100644 index 0000000..df4d28b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_fast.h" + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end) +{ + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->searchLength; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const current = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls); + if (i == 0 || hashTable[hash] == 0) + hashTable[hash] = current + i; + } + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const hlog, U32 const stepSize, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ms->window.dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + hashTable[h] = current; /* update hash table */ + + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex <= lowestIndex) + || (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + { U32 const offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const hlog, U32 const stepSize, U32 const mls) +{ + U32* hashTable = ms->hashTable; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ms->window.lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashTable[h] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex < lowestIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.h new file mode 100644 index 0000000..f0438ad --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_fast.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "mem.h" /* U32 */ +#include "zstd_compress_internal.h" + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.c new file mode 100644 index 0000000..9f15812 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.c @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_lazy.h" + + +/*-************************************* +* Binary Tree search +***************************************/ + +void ZSTD_updateDUBT( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : current >= btlow == (current - btmask) + * doesn't fail */ +static void ZSTD_insertDUBT1( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + U32 current, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, int extDict) +{ + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; + const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + current, dictLimit, windowLow); + assert(current >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + + if ( (!extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (current < dictLimit) /* both in extDict */) { + const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (current < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + current, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t ZSTD_DUBT_findBestMatch ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + U32 const extDict) +{ + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const windowLow = ms->window.lowLimit; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); + assert(ip <= iend-8); /* required for h calculation */ + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */ + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, cParams, matchIndex, iend, + nbCandidates, unsortLimit, extDict); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8+1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + current, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4); + case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6); + } +} + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5); + case 7 : + case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip) +{ + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength); +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const U32 extDict) +{ + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = ms->window.lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iLimit, + size_t* const offsetPtr) +{ + switch(cParams->searchLength) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base + ms->window.dictLimit; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + /* init */ + ip += (ip==base); + ms->nextToUpdate3 = ms->nextToUpdate; + { U32 const maxRep = (U32)(ip-base); + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { + /* repcode : we take it */ + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(ml2 * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((ml2 >= 4) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const U32 lowestIndex = ms->window.lowLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + /* init */ + ms->nextToUpdate3 = ms->nextToUpdate; + ip += (ip == prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2); +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.h new file mode 100644 index 0000000..bda064f --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_lazy.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +U32 ZSTD_insertAndFindFirstIndex( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.c new file mode 100644 index 0000000..bffd8a3 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.c @@ -0,0 +1,653 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_ldm.h" + +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 +#define LDM_HASH_CHAR_OFFSET 10 + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + U32 const windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (cParams->strategy >= ZSTD_btopt) { + /* Get out of the way of the optimal parser */ + U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); + assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); + assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); + params->minMatchLength = minMatch; + } + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashEveryLog == 0) { + params->hashEveryLog = + windowLog < params->hashLog ? 0 : windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = + ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getSmallHash() : + * numBits should be <= 32 + * If numBits==0, returns 0. + * @return : the most significant numBits of value. */ +static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) +{ + assert(numBits <= 32); + return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); +} + +/** ZSTD_ldm_getChecksum() : + * numBitsToDiscard should be <= 32 + * @return : the next most significant 32 bits after numBitsToDiscard */ +static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) +{ + assert(numBitsToDiscard <= 32); + return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; +} + +/** ZSTD_ldm_getTag() ; + * Given the hash, returns the most significant numTagBits bits + * after (32 + hbits) bits. + * + * If there are not enough bits remaining, return the last + * numTagBits bits. */ +static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) +{ + assert(numTagBits < 32 && hbits <= 32); + if (32 - hbits < numTagBits) { + return hash & (((U32)1 << numTagBits) - 1); + } else { + return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); + } +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const bucketOffsets = ldmState->bucketOffsets; + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; + bucketOffsets[hash]++; + bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; +} + +/** ZSTD_ldm_makeEntryAndInsertByTag() : + * + * Gets the small hash, checksum, and tag from the rollingHash. + * + * If the tag matches (1 << ldmParams.hashEveryLog)-1, then + * creates an ldmEntry from the offset, and inserts it into the hash table. + * + * hBits is the length of the small hash, which is the most significant hBits + * of rollingHash. The checksum is the next 32 most significant bits, followed + * by ldmParams.hashEveryLog bits that make up the tag. */ +static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, + U64 const rollingHash, + U32 const hBits, + U32 const offset, + ldmParams_t const ldmParams) +{ + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); + U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; + if (tag == tagMask) { + U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + ldmEntry_t entry; + entry.offset = offset; + entry.checksum = checksum; + ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); + } +} + +/** ZSTD_ldm_getRollingHash() : + * Get a 64-bit hash using the first len bytes from buf. + * + * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be + * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) + * + * where the constant a is defined to be prime8bytes. + * + * The implementation adds an offset to each byte, so + * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ +static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) +{ + U64 ret = 0; + U32 i; + for (i = 0; i < len; i++) { + ret *= prime8bytes; + ret += buf[i] + LDM_HASH_CHAR_OFFSET; + } + return ret; +} + +/** ZSTD_ldm_ipow() : + * Return base^exp. */ +static U64 ZSTD_ldm_ipow(U64 base, U64 exp) +{ + U64 ret = 1; + while (exp) { + if (exp & 1) { ret *= base; } + exp >>= 1; + base *= base; + } + return ret; +} + +U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { + DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength); + assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); + return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); +} + +/** ZSTD_ldm_updateHash() : + * Updates hash by removing toRemove and adding toAdd. */ +static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) +{ + hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); + hash *= prime8bytes; + hash += toAdd + LDM_HASH_CHAR_OFFSET; + return hash; +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + ZSTD_compressionParameters const* cParams, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(cParams->strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, cParams, iend); + ms->nextToUpdate = (U32)(iend - ms->window.base); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, cParams, iend); + ms->nextToUpdate = (U32)(iend - ms->window.base); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +/** ZSTD_ldm_fillLdmHashTable() : + * + * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). + * lastHash is the rolling hash that corresponds to lastHashed. + * + * Returns the rolling hash corresponding to position iend-1. */ +static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, + U64 lastHash, const BYTE* lastHashed, + const BYTE* iend, const BYTE* base, + U32 hBits, ldmParams_t const ldmParams) +{ + U64 rollingHash = lastHash; + const BYTE* cur = lastHashed + 1; + + while (cur < iend) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], + cur[ldmParams.minMatchLength-1], + state->hashPower); + ZSTD_ldm_makeEntryAndInsertByTag(state, + rollingHash, hBits, + (U32)(cur - base), ldmParams); + ++cur; + } + return rollingHash; +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const current = (U32)(anchor - ms->window.base); + if (current > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + current - MIN(512, current - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U64 const hashPower = ldmState->hashPower; + U32 const hBits = params->hashLog - params->bucketSizeLog; + U32 const ldmBucketSize = 1U << params->bucketSizeLog; + U32 const hashEveryLog = params->hashEveryLog; + U32 const ldmTagMask = (1U << params->hashEveryLog) - 1; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash */ + BYTE const* lastHashed = NULL; + U64 rollingHash = 0; + + while (ip <= ilimit) { + size_t mLength; + U32 const current = (U32)(ip - base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], + lastHashed[minMatchLength], + hashPower); + } else { + rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength); + } + lastHashed = ip; + + /* Do not insert and do not look for a match */ + if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) { + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits), + *params); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, + lowPrefixPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + } + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, + hBits, current, + *params); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + { + /* Store the sequence: + * ip = current - backwardMatchLength + * The match is at (bestEntry->offset - backwardMatchLength) + */ + U32 const matchIndex = bestEntry->offset; + U32 const offset = current - matchIndex; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(ip - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + *params); + + assert(ip + backwardMatchLength == lastHashed); + + /* Fill the hash table from lastHashed+1 to ip+mLength*/ + /* Heuristic: don't need to fill the entire table at end of block */ + if (ip + mLength <= ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits, *params); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + } + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximmum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, src); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, + int const extDict) +{ + unsigned const minMatch = cParams->searchLength; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, extDict); + BYTE const* const base = ms->window.base; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the lits */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(sequence.offset <= (1U << cParams->windowLog)); + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, cParams, ip); + /* Run the block compressor */ + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, cParams, ip, + sequence.litLength); + ip += sequence.litLength; + ms->nextToUpdate = (U32)(ip - base); + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, cParams, ip); + /* Compress the last literals */ + { + size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams, + ip, iend - ip); + ms->nextToUpdate = (U32)(iend - base); + return lastLiterals; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.h new file mode 100644 index 0000000..0c3789f --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_ldm.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include "zstd.h" /* ZSTD_CCtx, size_t */ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + void const* src, size_t srcSize, + int const extDict); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_getTableSize() : + * Return prime8bytes^(minMatchLength-1) */ +U64 ZSTD_ldm_getHashPower(U32 minMatchLength); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashEveryLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.c new file mode 100644 index 0000000..f63f0c5 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.c @@ -0,0 +1,923 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_opt.h" + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + + +/*-************************************* +* Price functions for optimal parser +***************************************/ +static void ZSTD_setLog2Prices(optState_t* optPtr) +{ + optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); + optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); + optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); + optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); +} + + +static void ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize) +{ + optPtr->staticPrices = 0; + + if (optPtr->litLengthSum == 0) { /* first init */ + unsigned u; + if (srcSize <= 1024) optPtr->staticPrices = 1; + + assert(optPtr->litFreq!=NULL); + for (u=0; u<=MaxLit; u++) + optPtr->litFreq[u] = 0; + for (u=0; ulitFreq[src[u]]++; + optPtr->litSum = 0; + for (u=0; u<=MaxLit; u++) { + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV); + optPtr->litSum += optPtr->litFreq[u]; + } + + for (u=0; u<=MaxLL; u++) + optPtr->litLengthFreq[u] = 1; + optPtr->litLengthSum = MaxLL+1; + for (u=0; u<=MaxML; u++) + optPtr->matchLengthFreq[u] = 1; + optPtr->matchLengthSum = MaxML+1; + for (u=0; u<=MaxOff; u++) + optPtr->offCodeFreq[u] = 1; + optPtr->offCodeSum = (MaxOff+1); + + } else { + unsigned u; + + optPtr->litSum = 0; + for (u=0; u<=MaxLit; u++) { + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1)); + optPtr->litSum += optPtr->litFreq[u]; + } + optPtr->litLengthSum = 0; + for (u=0; u<=MaxLL; u++) { + optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litLengthSum += optPtr->litLengthFreq[u]; + } + optPtr->matchLengthSum = 0; + for (u=0; u<=MaxML; u++) { + optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; + } + optPtr->offCodeSum = 0; + for (u=0; u<=MaxOff; u++) { + optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + optPtr->offCodeSum += optPtr->offCodeFreq[u]; + } + } + + ZSTD_setLog2Prices(optPtr); +} + + +/* ZSTD_rawLiteralsCost() : + * cost of literals (only) in given segment (which length can be null) + * does not include cost of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr) +{ + if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */ + if (litLength == 0) return 0; + + /* literals */ + { U32 u; + U32 cost = litLength * optPtr->log2litSum; + for (u=0; u < litLength; u++) + cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); + return cost; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) +{ + if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1); + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of the literal part of a sequence, + * including literals themselves, and literalLength symbol */ +static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr) +{ + return ZSTD_rawLiteralsCost(literals, litLength, optPtr) + + ZSTD_litLengthPrice(litLength, optPtr); +} + +/* ZSTD_litLengthContribution() : + * @return ( cost(litlength) - cost(0) ) + * this value can then be added to rawLiteralsCost() + * to provide a cost which is directly comparable to a match ending at same position */ +static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) +{ + if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1); + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + int const contribution = LL_bits[llCode] + + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) + - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); +#if 1 + return contribution; +#else + return MAX(0, contribution); /* sometimes better, sometimes not ... */ +#endif + } +} + +/* ZSTD_literalsContribution() : + * creates a fake cost for the literals part of a sequence + * which can be compared to the ending cost of a match + * should a new match start at this position */ +static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr) +{ + int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr) + + ZSTD_litLengthContribution(litLength, optPtr); + return contribution; +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice( + U32 const offset, U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->staticPrices) /* fixed scheme, do not use statistics */ + return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode; + + price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); + if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); + } + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + { U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = ms->nextToUpdate3; + U32 const target = ms->nextToUpdate3 = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, U32 const extDict) +{ + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = current+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > current + 8); + return matchEndIdx - (current + 8); +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const U32 extDict) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", + idx, target, extDict); + + while(idx < target) + idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iend) +{ + ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* const ip, const BYTE* const iLimit, int const extDict, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) +{ + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const current = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = btMask >= current ? 0 : current - btMask; + U32 const windowLow = ms->window.lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches"); + + /* check repCode */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = current - repOffset; + U32 repLen = 0; + assert(current >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ + if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= current */ + const BYTE* const repMatch = dictBase + repIndex; + assert(current >= windowLow); + if ( extDict /* this case only valid in extDict mode */ + && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found rep-match %u of length %u", + repCode - ll0, (U32)repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); + if ((matchIndex3 > windowLow) + & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(current > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = current+1; /* skip insertion */ + return 1; + } } } } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + assert(current > matchIndex); + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u", + (U32)matchLength, current - matchIndex); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if (matchLength > ZSTD_OPT_NUM) break; + if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + assert(matchEndIdx > current+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* const iHighLimit, int const extDict, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, U32 const lengthToBeat) +{ + U32 const matchLengthSearch = cParams->searchLength; + DEBUGLOG(7, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + + +typedef struct { + const BYTE* anchor; + U32 litlen; + U32 rawLitCost; +} cachedLiteralPrice_t; + +static U32 ZSTD_rawLiteralsCost_cached( + cachedLiteralPrice_t* const cachedLitPrice, + const BYTE* const anchor, U32 const litlen, + const optState_t* const optStatePtr) +{ + U32 startCost; + U32 remainingLength; + const BYTE* startPosition; + + if (anchor == cachedLitPrice->anchor) { + startCost = cachedLitPrice->rawLitCost; + startPosition = anchor + cachedLitPrice->litlen; + assert(litlen >= cachedLitPrice->litlen); + remainingLength = litlen - cachedLitPrice->litlen; + } else { + startCost = 0; + startPosition = anchor; + remainingLength = litlen; + } + + { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr); + cachedLitPrice->anchor = anchor; + cachedLitPrice->litlen = litlen; + cachedLitPrice->rawLitCost = rawLitCost; + return rawLitCost; + } +} + +static U32 ZSTD_fullLiteralsCost_cached( + cachedLiteralPrice_t* const cachedLitPrice, + const BYTE* const anchor, U32 const litlen, + const optState_t* const optStatePtr) +{ + return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr) + + ZSTD_litLengthPrice(litlen, optStatePtr); +} + +static int ZSTD_literalsContribution_cached( + cachedLiteralPrice_t* const cachedLitPrice, + const BYTE* const anchor, U32 const litlen, + const optState_t* const optStatePtr) +{ + int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr) + + ZSTD_litLengthContribution(litlen, optStatePtr); + return contribution; +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + const void* src, size_t srcSize, + const int optLevel, const int extDict) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + cachedLiteralPrice_t cachedLitPrice; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); + ms->nextToUpdate3 = ms->nextToUpdate; + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); + ip += (ip==prefixStart); + memset(&cachedLitPrice, 0, sizeof(cachedLitPrice)); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + U32 best_mlen, best_off; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie", + nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + best_mlen = maxML; + best_off = matches[nbMatches-1].off; + DEBUGLOG(7, "large match (%u>%u), immediate encoding", + best_mlen, sufficient_len); + cur = 0; + last_pos = 1; + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr); + U32 pos; + U32 matchNb; + for (pos = 0; pos < minMatch; pos++) { + opt[pos].mlen = 1; + opt[pos].price = ZSTD_MAX_PRICE; + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + DEBUGLOG(7, "rPos:%u => set initial price : %u", + pos, matchPrice); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = matchPrice; + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1; + int price; /* note : contribution can be negative */ + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr); + } else { + price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr); + } + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal", + cur, price, opt[cur].price); + opt[cur].mlen = 1; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); + } } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static*/ + && (opt[cur+1].price <= opt[cur].price) ) + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + + { U32 const ll0 = (opt[cur].mlen != 1); + U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; + U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0; + U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr); + U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch); + U32 matchNb; + if (!nbMatches) continue; + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u", + cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + | (cur + maxML >= ZSTD_OPT_NUM) ) { + best_mlen = maxML; + best_off = matches[nbMatches-1].off; + last_pos = cur + 1; + goto _shortestPath; + } + } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u => new better price (%u<%u)", + pos, price, opt[pos].price); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); + } else { + if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 1); + + /* reverse traversal */ + DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); + { U32 selectedMatchLength = best_mlen; + U32 selectedOffset = best_off; + U32 pos = cur; + while (1) { + U32 const mlen = opt[pos].mlen; + U32 const off = opt[pos].off; + opt[pos].mlen = selectedMatchLength; + opt[pos].off = selectedOffset; + selectedMatchLength = mlen; + selectedOffset = off; + if (mlen > pos) break; + pos -= mlen; + } } + + /* save sequences */ + { U32 pos; + for (pos=0; pos < last_pos; ) { + U32 const llen = (U32)(ip - anchor); + U32 const mlen = opt[pos].mlen; + U32 const offset = opt[pos].off; + if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */ + pos += mlen; ip += mlen; + + /* repcodes update : like ZSTD_updateRep(), but update in place */ + if (offset >= ZSTD_REP_NUM) { /* full offset */ + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + (llen==0); + if (repCode) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + if (repCode >= 2) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } + } + + ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH); + anchor = ip; + } } + ZSTD_setLog2Prices(optStatePtr); + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/); +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.h new file mode 100644 index 0000000..b8dc389 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstd_opt.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +void ZSTD_updateTree( + ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams, + const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */ + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_OPT_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.c b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.c new file mode 100644 index 0000000..c7a205d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.c @@ -0,0 +1,1831 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Tuning parameters ====== */ +#define ZSTDMT_NBWORKERS_MAX 200 +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */ +#define ZSTDMT_OVERLAPLOG_DEFAULT 6 + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Dependencies ====== */ +#include /* memcpy, memset */ +#include /* INT_MAX */ +#include "pool.h" /* threadpool */ +#include "threading.h" /* mutex */ +#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ +#include "zstd_ldm.h" +#include "zstdmt_compress.h" + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) + +# include +# include +# include +# define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); } + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + DEBUGLOGRAW(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); } +} + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ + if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ +} + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc( + sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_free(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); + ZSTD_free(bufPool->bTable[u].start, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_free(bufPool, bufPool->cMem); +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool) + + (bufPool->totalBuffers - 1) * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->bTable[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_free(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_malloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_malloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + if (buf.start == NULL) return; /* compatible with release on NULL */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_free(buf.start, bufPool->cMem); +} + + +/* ===== Seq Pool Wrapper ====== */ + +static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0}; + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = {NULL, 0, 0, 0}; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + unsigned totalCCtx; + unsigned availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + unsigned u; + for (u=0; utotalCCtx; u++) + ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */ + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + ZSTD_free(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc( + sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_free(cctxPool, cMem); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->totalCCtx = nbWorkers; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbWorkers-1) * sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; ucctx[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is udpated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + params.ldmParams.windowLog = params.cParams.windowLog; + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashEveryLog < 32); + serialState->ldmState.hashPower = + ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); + } else { + memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + size_t const bucketSize = (size_t)1 << bucketLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize)); + /* Reset the window */ + ZSTD_window_clear(&serialState->ldmState.window); + serialState->ldmWindow = serialState->ldmState.window; + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_free(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_free(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + memset(serialState->ldmState.hashTable, 0, hashSize); + memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + } + serialState->params = params; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_free(serialState->ldmState.hashTable, cMem); + ZSTD_free(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + size_t const err = ZSTD_referenceExternalSequences( + jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +/* ZSTDMT_compressionJob() is a POOL_function type */ +void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = 0; + + /* ressources */ + if (cctx==NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) { + job->cSize = forceWindowError; + goto _endJob; + } } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + NULL, /*cdict*/ + jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) { + job->cSize = initError; + goto _endJob; + } } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; } + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; } + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; } + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } } + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->consumed = job->src.size; + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_jobDescription* jobs; + ZSTDMT_bufferPool* bufPool; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_seqPool* seqPool; + ZSTD_CCtx_params params; + size_t targetSectionSize; + size_t targetPrefixSize; + roundBuff_t roundBuff; + inBuff_t inBuff; + int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */ + serialState_t serial; + unsigned singleBlockingThread; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX; + params->nbWorkers = nbWorkers; + params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT; + params->jobSize = 0; + return nbWorkers; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) +{ + return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cSize = 0; + } + memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_free(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + +/* Internal only */ +size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, + ZSTDMT_parameter parameter, unsigned value) { + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter"); + switch(parameter) + { + case ZSTDMT_p_jobSize : + DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value); + if ( (value > 0) /* value==0 => automatic job size */ + & (value < ZSTDMT_JOBSIZE_MIN) ) + value = ZSTDMT_JOBSIZE_MIN; + params->jobSize = value; + return value; + case ZSTDMT_p_overlapSectionLog : + if (value > 9) value = 9; + DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value); + params->overlapSizeLog = (value >= 9) ? 9 : value; + return value; + default : + return ERROR(parameter_unsupported); + } +} + +size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value) +{ + DEBUGLOG(4, "ZSTDMT_setMTCtxParameter"); + switch(parameter) + { + case ZSTDMT_p_jobSize : + return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); + case ZSTDMT_p_overlapSectionLog : + return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); + default : + return ERROR(parameter_unsupported); + } +} + +/* Sets parameters relevant to the compression job, + * initializing others to default values. */ +static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) +{ + ZSTD_CCtx_params jobParams; + memset(&jobParams, 0, sizeof(jobParams)); + + jobParams.cParams = params.cParams; + jobParams.fParams = params.fParams; + jobParams.compressionLevel = params.compressionLevel; + jobParams.disableLiteralCompression = params.disableLiteralCompression; + + return jobParams; +} + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getNbWorkers(): + * @return nb threads currently active in mtctx. + * mtctx must be valid */ +unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx) +{ + assert(mtctx != NULL); + return mtctx->params.nbWorkers; +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(6, "ZSTDMT_getFrameProgression"); + fps.consumed = mtctx->consumed; + fps.produced = mtctx->produced; + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady) + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex); + { size_t const cResult = mtctx->jobs[wJobID].cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + fps.consumed += mtctx->jobs[wJobID].consumed; + fps.ingested += mtctx->jobs[wJobID].src.size; + fps.produced += produced; + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) +{ + if (params.ldmParams.enableLdm) + return MAX(21, params.cParams.chainLog + 4); + return MAX(20, params.cParams.windowLog + 2); +} + +static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params) +{ + unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog; + if (params.ldmParams.enableLdm) + return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog); + return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog); +} + +static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) { + assert(nbWorkers>0); + { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); + size_t const jobMaxSize = jobSizeTarget << 2; + size_t const passSizeMax = jobMaxSize * nbWorkers; + unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1; + unsigned const nbJobsLarge = multiplier * nbWorkers; + unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1; + unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers); + return (multiplier>1) ? nbJobsLarge : nbJobsSmall; +} } + +/* ZSTDMT_compress_advanced_internal() : + * This is a blocking function : it will only give back control to caller after finishing its compression job. + */ +static size_t ZSTDMT_compress_advanced_internal( + ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params) +{ + ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); + size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params); + unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); + size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; + size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ + const char* const srcStart = (const char*)src; + size_t remainingSrcSize = srcSize; + unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */ + size_t frameStartPos = 0, dstBufferPos = 0; + assert(jobParams.nbWorkers == 0); + assert(mtctx->cctxPool->totalCCtx == params.nbWorkers); + + params.jobSize = (U32)avgJobSize; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ", + nbJobs, (U32)proposedJobSize, (U32)avgJobSize); + + if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */ + ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); + if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); + } + + assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) ); + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params)) + return ERROR(memory_allocation); + + if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */ + U32 jobsTableSize = nbJobs; + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */ + mtctx->jobIDMask = jobsTableSize - 1; + } + + { unsigned u; + for (u=0; ujobs[u].prefix.start = srcStart + frameStartPos - dictSize; + mtctx->jobs[u].prefix.size = dictSize; + mtctx->jobs[u].src.start = srcStart + frameStartPos; + mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */ + mtctx->jobs[u].consumed = 0; + mtctx->jobs[u].cSize = 0; + mtctx->jobs[u].cdict = (u==0) ? cdict : NULL; + mtctx->jobs[u].fullFrameSize = srcSize; + mtctx->jobs[u].params = jobParams; + /* do not calculate checksum within sections, but write it in header for first section */ + mtctx->jobs[u].dstBuff = dstBuffer; + mtctx->jobs[u].cctxPool = mtctx->cctxPool; + mtctx->jobs[u].bufPool = mtctx->bufPool; + mtctx->jobs[u].seqPool = mtctx->seqPool; + mtctx->jobs[u].serial = &mtctx->serial; + mtctx->jobs[u].jobID = u; + mtctx->jobs[u].firstJob = (u==0); + mtctx->jobs[u].lastJob = (u==nbJobs-1); + + DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize); + DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12); + POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]); + + frameStartPos += jobSize; + dstBufferPos += dstBufferCapacity; + remainingSrcSize -= jobSize; + } } + + /* collect result */ + { size_t error = 0, dstPos = 0; + unsigned jobID; + for (jobID=0; jobIDjobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID); + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + DEBUGLOG(5, "ready to write job %u ", jobID); + + { size_t const cSize = mtctx->jobs[jobID].cSize; + if (ZSTD_isError(cSize)) error = cSize; + if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); + if (jobID) { /* note : job 0 is written directly at dst, which is correct position */ + if (!error) + memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */ + if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */ + DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + } } + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cSize = 0; + dstPos += cSize ; + } + } /* for (jobID=0; jobIDserial.xxhState); + if (dstPos + 4 > dstCapacity) { + error = ERROR(dstSize_tooSmall); + } else { + DEBUGLOG(4, "writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)dst + dstPos, checksum); + dstPos += 4; + } } + + if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); + return error ? error : dstPos; + } +} + +size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + unsigned overlapLog) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + cctxParams.overlapSizeLog = overlapLog; + return ZSTDMT_compress_advanced_internal(mtctx, + dst, dstCapacity, + src, srcSize, + cdict, cctxParams); +} + + +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT; + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); + params.fParams.contentSizeFlag = 1; + return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog); +} + + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + assert(mtctx->cctxPool->totalCCtx == params.nbWorkers); + + /* init */ + if (params.jobSize == 0) { + params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params); + } + if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; + + mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ + if (mtctx->singleBlockingThread) { + ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); + DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); + assert(singleThreadParams.nbWorkers == 0); + return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], + dict, dictSize, cdict, + singleThreadParams, pledgedSrcSize); + } + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params); + DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN; + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params)) + return ERROR(memory_allocation); + return 0; +} + +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ + DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize); + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL, + cctxParams, pledgedSrcSize); +} + +size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams = mtctx->params; + if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */ + cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict); + cctxParams.fParams = fParams; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict, + cctxParams, pledgedSrcSize); +} + + +/* ZSTDMT_resetCStream() : + * pledgedSrcSize can be zero == unknown (for the time being) + * prefer using ZSTD_CONTENTSIZE_UNKNOWN, + * as `0` might mean "empty" in the future */ +size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize) +{ + if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params, + pledgedSrcSize); +} + +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) { + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); + ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ + DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel); + cctxParams.cParams = params.cParams; + cctxParams.fParams = params.fParams; + return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successfull if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag; + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeStart = (BYTE const*)range.start; + BYTE const* const rangeEnd = rangeStart + range.size; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(6, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(6, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(6, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ + return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); + } + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* single-pass shortcut (note : synchronous-mode) */ + if ( (mtctx->nextJobID == 0) /* just started */ + && (mtctx->inBuff.filled == 0) /* nothing buffered */ + && (!mtctx->jobReady) /* no job already created */ + && (endOp == ZSTD_e_end) /* end order */ + && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */ + size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx, + (char*)output->dst + output->pos, output->size - output->pos, + (const char*)input->src + input->pos, input->size - input->pos, + mtctx->cdict, mtctx->params); + if (ZSTD_isError(cSize)) return cSize; + input->pos = input->size; + output->pos += cSize; + mtctx->allJobsCompleted = 1; + mtctx->frameEnded = 1; + return 0; + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + assert(mtctx->doneJobID != mtctx->nextJobID); + } + } + if (mtctx->inBuff.buffer.start != NULL) { + size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad); + input->pos += toLoad; + mtctx->inBuff.filled += toLoad; + forwardInputProgress = toLoad>0; + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) + endOp = ZSTD_e_flush; /* can't end now : not all input consumed */ + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + return remainingToFlush; + } +} + + +size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); + + /* recommended next input size : fill current input buffer */ + return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ +} + + +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame) +{ + size_t const srcSize = mtctx->inBuff.filled; + DEBUGLOG(5, "ZSTDMT_flushStream_internal"); + + if ( mtctx->jobReady /* one job ready for a worker to pick up */ + || (srcSize > 0) /* still some data within input buffer */ + || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ + DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", + (U32)srcSize, (U32)endFrame); + CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); + } + + /* check if there is any data available to flush */ + return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame); +} + + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) +{ + DEBUGLOG(5, "ZSTDMT_flushStream"); + if (mtctx->singleBlockingThread) + return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush); +} + +size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) +{ + DEBUGLOG(4, "ZSTDMT_endStream"); + if (mtctx->singleBlockingThread) + return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end); +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.h b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.h new file mode 100644 index 0000000..f79e3b4 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/compress/zstdmt_compress.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * Some methods are still exposed (ZSTDLIB_API), + * because it used to be the only way to invoke MT compression. + * Now, it's recommended to use ZSTD_compress_generic() instead. + * These methods will stop being exposed in a future version */ + +/* === Dependencies === */ +#include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ + + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); +ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem); +ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + + +/* === Simple one-pass compression function === */ + +ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + + + +/* === Streaming functions === */ + +ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ + +ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ + + +/* === Advanced functions and parameters === */ + +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ +#endif + +ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + unsigned overlapLog); + +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ + ZSTD_parameters params, + unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ + +ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fparams, + unsigned long long pledgedSrcSize); /* note : zero means empty */ + +/* ZSTDMT_parameter : + * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ +typedef enum { + ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ + ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ +} ZSTDMT_parameter; + +/* ZSTDMT_setMTCtxParameter() : + * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. + * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ + * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value); + + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + +size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value); + +/* ZSTDMT_CCtxParam_setNbWorkers() + * Set nbWorkers, and clamp it. + * Also reset jobSize and overlapLog */ +size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/* ZSTDMT_getNbWorkers(): + * @return nb threads currently active in mtctx. + * mtctx must be valid */ +unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx); + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/decompress/huf_decompress.c b/c-blosc/internal-complibs/zstd-1.3.4/decompress/huf_decompress.c new file mode 100644 index 0000000..73f5c46 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/decompress/huf_decompress.c @@ -0,0 +1,1096 @@ +/* ****************************************************************** + Huffman decoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include /* memcpy, memset */ +#include "bitstream.h" /* BIT_* */ +#include "compiler.h" +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + + U32* rankVal; + BYTE* huffWeight; + size_t spaceUsed32 = 0; + + rankVal = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; + huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Calculate starting value for each rank */ + { U32 n, nextRankStart = 0; + for (n=1; n> 1; + U32 u; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (u = rankVal[w]; u < rankVal[w] + length; u++) + dt[u] = D; + rankVal[w] += length; + } } + + return iSize; +} + +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = BIT_DStream_unfinished; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + BIT_reloadDStream(&bitD1); + BIT_reloadDStream(&bitD2); + BIT_reloadDStream(&bitD3); + BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X4_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X4_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; + const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); +#if DYNAMIC_BMI2 + +#define X(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define X(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + +X(HUF_decompress1X2_usingDTable_internal) +X(HUF_decompress4X2_usingDTable_internal) +X(HUF_decompress1X4_usingDTable_internal) +X(HUF_decompress4X4_usingDTable_internal) + +#undef X + + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/* HUF_fillDTableX4Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_TABLELOG_MAX + 1]; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_MAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX4 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src, + size_t srcSize, void* workSpace, + size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; + U32 *rankStart; + + rankValCol_t* rankVal; + U32* rankStats; + U32* rankStart0; + sortedSymbol_t* sortedSymbol; + BYTE* weightList; + size_t spaceUsed32 = 0; + + rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; + rankStats = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 1; + rankStart0 = (U32 *)workSpace + spaceUsed32; + spaceUsed32 += HUF_TABLELOG_MAX + 2; + sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); + spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; + weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); + spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + + rankStart = rankStart0 + 1; + memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w> consumed; + } } } } + + HUF_fillDTableX4(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + +size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX4_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + +size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* ********************************/ +/* Generic decompression selector */ +/* ********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + + +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128 KB); + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; +} } + + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): + HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); + } +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +} + +size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/decompress/zstd_decompress.c b/c-blosc/internal-complibs/zstd-1.3.4/decompress/zstd_decompress.c new file mode 100644 index 0000000..3ec6a1c --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/decompress/zstd_decompress.c @@ -0,0 +1,3003 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1) +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ +#include "cpu.h" +#include "mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "zstd_legacy.h" +#endif + + +/*-************************************* +* Errors +***************************************/ +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* +* Context management +***************************************************************/ +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + + +typedef struct { + U32 fastMode; + U32 tableLog; +} ZSTD_seqSymbol_header; + +typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; +} ZSTD_seqSymbol; + +#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyDTables_t; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + const void* previousDstEnd; /* detect continuity */ + const void* base; /* start of current segment */ + const void* vBase; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + U32 dictID; + ZSTD_format_e format; + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* streaming */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? + ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : + ZSTD_frameHeaderSize_prefix; + ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ + dctx->staticSize = 0; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + if (dctx->staticSize) return ERROR(memory_allocation); /* not compatible with static DCtx */ + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + ZSTD_free(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + ZSTD_free(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_frameIdSize) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + if (srcSize < minInputSize) return ERROR(srcSize_wrong); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_internal() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + if (srcSize < minInputSize) return minInputSize; + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_skippableHeaderSize) + return ZSTD_skippableHeaderSize; /* magic number + frame length */ + memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_frameIdSize); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + return ERROR(prefix_unknown); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + if ((fhdByte & 0x08) != 0) + return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */ + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + if (windowLog > ZSTD_WINDOWLOG_MAX) + return ERROR(frameParameter_windowTooLarge); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_internal(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_frameHeaderSize_prefix) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t skippableSize; + if (srcSize < ZSTD_skippableHeaderSize) + return ERROR(srcSize_wrong); + skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_frameIdSize) + + ZSTD_skippableHeaderSize; + if (srcSize < skippableSize) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : +* compatible with legacy mode +* @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : +* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). +* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_internal(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ + if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) + return ERROR(dictionary_wrong); + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + return 0; +} + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : +* Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); + return cSize; + } +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + size_t regenSize) +{ + if (srcSize != 1) return ERROR(srcSize_wrong); + if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, *(const BYTE*)src, regenSize); + return regenSize; +} + +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); + /* fall-through */ + case set_compressed: + if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + (istart[4] << 10); + break; + } + if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + + if (HUF_isError((litEncType==set_repeat) ? + ( singleStream ? + HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) : + HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) : + ( singleStream ? + HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) : + HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2)))) + return ERROR(corruption_detected); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + if (litSize+lhSize > srcSize) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ + break; + } + if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + return ERROR(corruption_detected); /* impossible */ + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) */ +static void +ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U16 symbolNext[MaxSeq+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; u max) return ERROR(corruption_detected); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + if (!flagRepeatTable) return ERROR(corruption_detected); + return 0; + case set_compressed : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + *DTablePtr = DTableSpace; + return headerSize; + } + default : /* impossible */ + assert(0); + return ERROR(GENERIC); + } +} + +static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); + + /* SeqHead */ + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + if (ip >= iend) return ERROR(srcSize_wrong); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + } + + /* FSE table descriptors */ + if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy); + if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + + +FORCE_NOINLINE +size_t ZSTD_execSequenceLast7(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */ + + /* copy literals */ + if (op < oend_w) { + ZSTD_wildcopy(op, *litPtr, oend_w - op); + *litPtr += oend_w - op; + op = oend_w; + } + while (op < oLitEnd) *op++ = *(*litPtr)++; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + while (op < oMatchEnd) *op++ = *match++; + return sequenceLength; +} + + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix -> go into extDict */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) + return ERROR(corruption_detected); + match = dictEnd + (match - base); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + + +HINT_INLINE +size_t ZSTD_execSequenceLong(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = sequence.match; + + /* check */ + if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + if (op > oend_w || sequence.matchLength < MINMATCH) { + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; + return sequenceLength; + } + } } + assert(op <= oend_w); + assert(sequence.matchLength >= MINMATCH); + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { /* offset == 0 */ + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const base = (const BYTE*) (dctx->base); + const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences"); + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + nbSeq--; + { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq); + if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + + + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + { size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->base); + const BYTE* const dictStart = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STOSEQ_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } +#undef STORED_SEQS +#undef STOSEQ_MASK +#undef ADVANCED_SEQS + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +#endif + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, + const void *seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset); + +static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} + + +static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + if ( (!frame || dctx->fParams.windowSize > (1<<24)) + && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + if (shareLongOffsets >= minShare) + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + } + + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + } +} + + +static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dst; + dctx->previousDstEnd = dst; + } +} + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} + + +/** ZSTD_insertBlock() : + insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + ZSTD_checkContinuity(dctx, blockStart); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length) +{ + if (length > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, byte, length); + return length; +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); +#endif + if ( (srcSize >= ZSTD_skippableHeaderSize) + && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) { + return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize); + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) return ret; + if (ret > 0) return ERROR(srcSize_wrong); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Loop on each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ERROR(srcSize_wrong); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + + if (blockProperties.lastBlock) break; + } + + if (zfh.checksumFlag) { /* Final frame content checksum */ + if (remainingSize < 4) return ERROR(srcSize_wrong); + ip += 4; + remainingSize -= 4; + } + + return ip - ipstart; + } +} + +/*! ZSTD_decompressFrame() : +* @dctx must be properly initialized */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* ip = (const BYTE*)(*srcPtr); + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t remainingSize = *srcSizePtr; + + /* check */ + if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) + return ERROR(srcSize_wrong); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize) + return ERROR(srcSize_wrong); + CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + ip += frameHeaderSize; remainingSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + return ERROR(corruption_detected); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->fParams.checksumFlag) + XXH64_update(&dctx->xxhState, op, decodedSize); + op += decodedSize; + ip += cBlockSize; + remainingSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + if ((U64)(op-ostart) != dctx->fParams.frameContentSize) { + return ERROR(corruption_detected); + } } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + if (remainingSize<4) return ERROR(checksum_wrong); + checkRead = MEM_readLE32(ip); + if (checkRead != checkCalc) return ERROR(checksum_wrong); + ip += 4; + remainingSize -= 4; + } + + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSize; + return op-ostart; +} + +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict); +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict); + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDictDictContent(ddict); + dictSize = ZSTD_DDictDictSize(ddict); + } + + while (srcSize >= ZSTD_frameHeaderSize_prefix) { + U32 magicNumber; + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + /* legacy support is not compatible with static dctx */ + if (dctx->staticSize) return ERROR(memory_allocation); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); + if (magicNumber != ZSTD_MAGICNUMBER) { + if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t skippableSize; + if (srcSize < ZSTD_skippableHeaderSize) + return ERROR(srcSize_wrong); + skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize) + + ZSTD_skippableHeaderSize; + if (srcSize < skippableSize) return ERROR(srcSize_wrong); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + return ERROR(prefix_unknown); + } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + } + ZSTD_checkContinuity(dctx, dst); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + if (ZSTD_isError(res)) return res; + /* no need to bound check, ZSTD_decompressFrame already has */ + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ + + return (BYTE*)dst - (BYTE*)dststart; +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (dctx==NULL) return ERROR(memory_allocation); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize); + /* Sanity check */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */ + if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_frameIdSize); /* to read skippable magic number */ + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + break; + case bt_raw : + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); + break; + case bt_reserved : /* should never happen */ + default: + return ERROR(corruption_detected); + } + if (ZSTD_isError(rSize)) return rSize; + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize); + dctx->decodedSize += rSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize); + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + if (dctx->decodedSize != dctx->fParams.frameContentSize) { + return ERROR(corruption_detected); + } } + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + dctx->previousDstEnd = (char*)dst + rSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32); + if (check32 != h32) return ERROR(checksum_wrong); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_skippableHeaderSize); + memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_frameIdSize); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + return ERROR(GENERIC); /* impossible */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; + return 0; +} + +/* ZSTD_loadEntropy() : + * dict : must point at beginning of a valid zstd dictionary + * @return : size of entropy tables read */ +static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + if (dictSize <= 8) return ERROR(dictionary_corrupted); + dictPtr += 8; /* skip header = magic + dictID */ + + + { size_t const hSize = HUF_readDTableX4_wksp( + entropy->hufTable, dictPtr, dictEnd - dictPtr, + entropy->workspace, sizeof(entropy->workspace)); + if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + U32 offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + ZSTD_buildFSETable(entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + ZSTD_buildFSETable(entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + ZSTD_buildFSETable(entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog); + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted); + entropy->rep[i] = rep; + } } + + return dictPtr - (const BYTE*)dict; +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_frameIdSize); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +/* Note : this function cannot fail */ +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + dctx->vBase = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + CHECK_F( ZSTD_decompressBegin(dctx) ); + if (dict && dictSize) + CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict) +{ + return ddict->dictContent; +} + +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict) +{ + return ddict->dictSize; +} + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict) +{ + CHECK_F( ZSTD_decompressBegin(dstDCtx) ); + if (ddict) { /* support begin on NULL */ + dstDCtx->dictID = ddict->dictID; + dstDCtx->base = ddict->dictContent; + dstDCtx->vBase = ddict->dictContent; + dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dstDCtx->previousDstEnd = dstDCtx->dictEnd; + if (ddict->entropyPresent) { + dstDCtx->litEntropy = 1; + dstDCtx->fseEntropy = 1; + dstDCtx->LLTptr = ddict->entropy.LLTable; + dstDCtx->MLTptr = ddict->entropy.MLTable; + dstDCtx->OFTptr = ddict->entropy.OFTable; + dstDCtx->HUFptr = ddict->entropy.hufTable; + dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; + dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; + dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dstDCtx->litEntropy = 0; + dstDCtx->fseEntropy = 0; + } + } + return 0; +} + +static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize); + + /* load entropy tables */ + CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted ); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + } else { + void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) ); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + if (!ddict) return NULL; + ddict->cMem = customMem; + + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) { + ZSTD_freeDDict(ddict); + return NULL; + } + + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = + sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace; + assert(workspace != NULL); + assert(dict != NULL); + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_free(ddict->dictBuffer, cMem); + ZSTD_free(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_frameIdSize); +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompresse frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + ZSTD_freeDDict(dctx->ddictLocal); + if (dict && dictSize >= 8) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + if (dctx->ddictLocal == NULL) return ERROR(memory_allocation); + } else { + dctx->ddictLocal = NULL; + } + dctx->ddict = dctx->ddictLocal; + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType); +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_frameHeaderSize_prefix. + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + zds->streamStage = zdss_init; + CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + return ZSTD_frameHeaderSize_prefix; +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDict(zds, NULL, 0); +} + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->ddict = ddict; + return 0; +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + size_t const initResult = ZSTD_initDStream(dctx); + dctx->ddict = ddict; + return initResult; +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_frameHeaderSize_prefix. + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + DEBUGLOG(4, "ZSTD_resetDStream"); + dctx->streamStage = zdss_loadHeader; + dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0; + dctx->legacyVersion = 0; + dctx->hostageByte = 0; + return ZSTD_frameHeaderSize_prefix; +} + +size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx, + ZSTD_DStreamParameter_e paramType, unsigned paramValue) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + switch(paramType) + { + default : return ERROR(parameter_unsupported); + case DStream_p_maxWindowSize : + DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10); + dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1); + break; + } + return 0; +} + +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format); + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->format = format; + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + if (err>0) return ERROR(srcSize_wrong); + if (zfh.windowSize > windowSizeMax) + return ERROR(frameParameter_windowTooLarge); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const istart = (const char*)(input->src) + input->pos; + const char* const iend = (const char*)(input->src) + input->size; + const char* ip = istart; + char* const ostart = (char*)(output->dst) + output->pos; + char* const oend = (char*)(output->dst) + output->size; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + if (input->pos > input->size) { /* forbidden */ + DEBUGLOG(5, "in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + return ERROR(srcSize_wrong); + } + if (output->pos > output->size) { /* forbidden */ + DEBUGLOG(5, "out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + return ERROR(dstSize_tooSmall); + } + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + /* legacy support is incompatible with static dctx */ + if (zds->staticSize) return ERROR(memory_allocation); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL; + size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + /* legacy support is incompatible with static dctx */ + if (zds->staticSize) return ERROR(memory_allocation); + CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize)); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict)); + + if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_frameIdSize); + zds->stage = ZSTDds_skipFrame; + } else { + CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize); + if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx)) + return ERROR(memory_allocation); + } else { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); + if (zds->inBuff == NULL) return ERROR(memory_allocation); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), + ip, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + ip += neededInSize; + if (!decodedSize && !isSkipFrame) break; /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + { size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, + zds->inBuff, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + zds->inPos = 0; /* input is consumed */ + if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + } } + zds->streamStage = zdss_flush; + /* fall-through */ + + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: return ERROR(GENERIC); /* impossible */ + } } + + /* result */ + input->pos += (size_t)(ip-istart); + output->pos += (size_t)(op-ostart); + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + + +size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + return ZSTD_decompressStream(dctx, output, input); +} + +size_t ZSTD_decompress_generic_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompress_generic(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +void ZSTD_DCtx_reset(ZSTD_DCtx* dctx) +{ + (void)ZSTD_initDStream(dctx); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff.h b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff.h new file mode 100644 index 0000000..a93115d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* *************************************************************** +* NOTES/WARNINGS +******************************************************************/ +/* The streaming API defined here is deprecated. + * Consider migrating towards ZSTD_compressStream() API in `zstd.h` + * See 'lib/README.md'. + *****************************************************************/ + + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_BUFFERED_H_23987 +#define ZSTD_BUFFERED_H_23987 + +/* ************************************* +* Dependencies +***************************************/ +#include /* size_t */ +#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */ + + +/* *************************************************************** +* Compiler specifics +*****************************************************************/ +/* Deprecation warnings */ +/* Should these warnings be a problem, + it is generally possible to disable them, + typically with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZBUFF_DEPRECATED for this compiler") +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZBUFF_DISABLE_DEPRECATE_WARNINGS */ + + +/* ************************************* +* Streaming functions +***************************************/ +/* This is the easier "buffered" streaming API, +* using an internal buffer to lift all restrictions on user-provided buffers +* which can be any size, any place, for both input and output. +* ZBUFF and ZSTD are 100% interoperable, +* frames created by one can be decoded by the other one */ + +typedef ZSTD_CStream ZBUFF_CCtx; +ZBUFF_DEPRECATED("use ZSTD_createCStream") ZBUFF_CCtx* ZBUFF_createCCtx(void); +ZBUFF_DEPRECATED("use ZSTD_freeCStream") size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); + +ZBUFF_DEPRECATED("use ZSTD_initCStream") size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); +ZBUFF_DEPRECATED("use ZSTD_initCStream_usingDict") size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); + +ZBUFF_DEPRECATED("use ZSTD_compressStream") size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); +ZBUFF_DEPRECATED("use ZSTD_flushStream") size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); +ZBUFF_DEPRECATED("use ZSTD_endStream") size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); + +/*-************************************************* +* Streaming compression - howto +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Start by initializing ZBUF_CCtx. +* Use ZBUFF_compressInit() to start a new compression operation. +* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. +* +* Use ZBUFF_compressContinue() repetitively to consume input stream. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). +* The nb of bytes written into `dst` will be reported into *dstCapacityPtr. +* Note that the function cannot output more than *dstCapacityPtr, +* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* In which case, call again ZBUFF_compressFlush() to complete the flush. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize() +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency) +* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. +* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. +* **************************************************/ + + +typedef ZSTD_DStream ZBUFF_DCtx; +ZBUFF_DEPRECATED("use ZSTD_createDStream") ZBUFF_DCtx* ZBUFF_createDCtx(void); +ZBUFF_DEPRECATED("use ZSTD_freeDStream") size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); + +ZBUFF_DEPRECATED("use ZSTD_initDStream") size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); +ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); + +ZBUFF_DEPRECATED("use ZSTD_decompressStream") size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFF_DCtx object is required to track streaming operations. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation, +* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFF_DCtx objects can be re-init multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : 0 when a frame is completely decoded and fully flushed, +* 1 when there is still some data left within internal buffer to flush, +* >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency), +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() +* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFF_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZBUFF_DEPRECATED("use ZSTD_isError") unsigned ZBUFF_isError(size_t errorCode); +ZBUFF_DEPRECATED("use ZSTD_getErrorName") const char* ZBUFF_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZBUFF_DEPRECATED("use ZSTD_CStreamInSize") size_t ZBUFF_recommendedCInSize(void); +ZBUFF_DEPRECATED("use ZSTD_CStreamOutSize") size_t ZBUFF_recommendedCOutSize(void); +ZBUFF_DEPRECATED("use ZSTD_DStreamInSize") size_t ZBUFF_recommendedDInSize(void); +ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(void); + +#endif /* ZSTD_BUFFERED_H_23987 */ + + +#ifdef ZBUFF_STATIC_LINKING_ONLY +#ifndef ZBUFF_STATIC_H_30298098432 +#define ZBUFF_STATIC_H_30298098432 + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used in association with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +/*--- Dependency ---*/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */ +#include "zstd.h" + + +/*--- Custom memory allocator ---*/ +/*! ZBUFF_createCCtx_advanced() : + * Create a ZBUFF compression context using external alloc and free functions */ +ZBUFF_DEPRECATED("use ZSTD_createCStream_advanced") ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem); + +/*! ZBUFF_createDCtx_advanced() : + * Create a ZBUFF decompression context using external alloc and free functions */ +ZBUFF_DEPRECATED("use ZSTD_createDStream_advanced") ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem); + + +/*--- Advanced Streaming Initialization ---*/ +ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); + + +#endif /* ZBUFF_STATIC_H_30298098432 */ +#endif /* ZBUFF_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_common.c b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_common.c new file mode 100644 index 0000000..661b9b0 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_common.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include "error_private.h" +#include "zbuff.h" + +/*-**************************************** +* ZBUFF Error Management (deprecated) +******************************************/ + +/*! ZBUFF_isError() : +* tells if a return value is an error code */ +unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } +/*! ZBUFF_getErrorName() : +* provides error code string from function result (useful for debugging) */ +const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } diff --git a/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_compress.c b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_compress.c new file mode 100644 index 0000000..f39c60d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_compress.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +/*-*********************************************************** +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *dstCapacityPtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value. +* output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed. +* ***********************************************************/ + +ZBUFF_CCtx* ZBUFF_createCCtx(void) +{ + return ZSTD_createCStream(); +} + +ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createCStream_advanced(customMem); +} + +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) +{ + return ZSTD_freeCStream(zbc); +} + + +/* ====== Initialization ====== */ + +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */ + return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize); +} + + +size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) +{ + return ZSTD_initCStream_usingDict(zbc, dict, dictSize, compressionLevel); +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZSTD_initCStream(zbc, compressionLevel); +} + +/* ====== Compression ====== */ + + +size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + ZSTD_inBuffer inBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + inBuff.src = src; + inBuff.pos = 0; + inBuff.size = *srcSizePtr; + result = ZSTD_compressStream(zbc, &outBuff, &inBuff); + *dstCapacityPtr = outBuff.pos; + *srcSizePtr = inBuff.pos; + return result; +} + + + +/* ====== Finalize ====== */ + +size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + result = ZSTD_flushStream(zbc, &outBuff); + *dstCapacityPtr = outBuff.pos; + return result; +} + + +size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + result = ZSTD_endStream(zbc, &outBuff); + *dstCapacityPtr = outBuff.pos; + return result; +} + + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedCInSize(void) { return ZSTD_CStreamInSize(); } +size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_CStreamOutSize(); } diff --git a/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_decompress.c b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_decompress.c new file mode 100644 index 0000000..923c22b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/deprecated/zbuff_decompress.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +ZBUFF_DCtx* ZBUFF_createDCtx(void) +{ + return ZSTD_createDStream(); +} + +ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDStream_advanced(customMem); +} + +size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd) +{ + return ZSTD_freeDStream(zbd); +} + + +/* *** Initialization *** */ + +size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize) +{ + return ZSTD_initDStream_usingDict(zbd, dict, dictSize); +} + +size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd) +{ + return ZSTD_initDStream(zbd); +} + + +/* *** Decompression *** */ + +size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + ZSTD_outBuffer outBuff; + ZSTD_inBuffer inBuff; + size_t result; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + inBuff.src = src; + inBuff.pos = 0; + inBuff.size = *srcSizePtr; + result = ZSTD_decompressStream(zbd, &outBuff, &inBuff); + *dstCapacityPtr = outBuff.pos; + *srcSizePtr = inBuff.pos; + return result; +} + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedDInSize(void) { return ZSTD_DStreamInSize(); } +size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_DStreamOutSize(); } diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/cover.c b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/cover.c new file mode 100644 index 0000000..b5a3957 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/cover.c @@ -0,0 +1,1048 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* ***************************************************************************** + * Constructs a dictionary using a heuristic based on the following paper: + * + * Liao, Petri, Moffat, Wirth + * Effective Construction of Relative Lempel-Ziv Dictionaries + * Published in WWW 2016. + * + * Adapted from code originally written by @ot (Giuseppe Ottaviano). + ******************************************************************************/ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "pool.h" +#include "threading.h" +#include "zstd_internal.h" /* includes zstd.h */ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + +/*-************************************* +* Constants +***************************************/ +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) + +/*-************************************* +* Console display +***************************************/ +static int g_displayLevel = 2; +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + +/*-************************************* +* Hash table +*************************************** +* A small specialized hash map for storing activeDmers. +* The map does not resize, so if it becomes full it will loop forever. +* Thus, the map must be large enough to store every value. +* The map implements linear probing and keeps its load less than 0.5. +*/ + +#define MAP_EMPTY_VALUE ((U32)-1) +typedef struct COVER_map_pair_t_s { + U32 key; + U32 value; +} COVER_map_pair_t; + +typedef struct COVER_map_s { + COVER_map_pair_t *data; + U32 sizeLog; + U32 size; + U32 sizeMask; +} COVER_map_t; + +/** + * Clear the map. + */ +static void COVER_map_clear(COVER_map_t *map) { + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); +} + +/** + * Initializes a map of the given size. + * Returns 1 on success and 0 on failure. + * The map must be destroyed with COVER_map_destroy(). + * The map is only guaranteed to be large enough to hold size elements. + */ +static int COVER_map_init(COVER_map_t *map, U32 size) { + map->sizeLog = ZSTD_highbit32(size) + 2; + map->size = (U32)1 << map->sizeLog; + map->sizeMask = map->size - 1; + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); + if (!map->data) { + map->sizeLog = 0; + map->size = 0; + return 0; + } + COVER_map_clear(map); + return 1; +} + +/** + * Internal hash function + */ +static const U32 prime4bytes = 2654435761U; +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { + return (key * prime4bytes) >> (32 - map->sizeLog); +} + +/** + * Helper function that returns the index that a key should be placed into. + */ +static U32 COVER_map_index(COVER_map_t *map, U32 key) { + const U32 hash = COVER_map_hash(map, key); + U32 i; + for (i = hash;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *pos = &map->data[i]; + if (pos->value == MAP_EMPTY_VALUE) { + return i; + } + if (pos->key == key) { + return i; + } + } +} + +/** + * Returns the pointer to the value for key. + * If key is not in the map, it is inserted and the value is set to 0. + * The map must not be full. + */ +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; + if (pos->value == MAP_EMPTY_VALUE) { + pos->key = key; + pos->value = 0; + } + return &pos->value; +} + +/** + * Deletes key from the map if present. + */ +static void COVER_map_remove(COVER_map_t *map, U32 key) { + U32 i = COVER_map_index(map, key); + COVER_map_pair_t *del = &map->data[i]; + U32 shift = 1; + if (del->value == MAP_EMPTY_VALUE) { + return; + } + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *const pos = &map->data[i]; + /* If the position is empty we are done */ + if (pos->value == MAP_EMPTY_VALUE) { + del->value = MAP_EMPTY_VALUE; + return; + } + /* If pos can be moved to del do so */ + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { + del->key = pos->key; + del->value = pos->value; + del = pos; + shift = 1; + } else { + ++shift; + } + } +} + +/** + * Destroyes a map that is inited with COVER_map_init(). + */ +static void COVER_map_destroy(COVER_map_t *map) { + if (map->data) { + free(map->data); + } + map->data = NULL; + map->size = 0; +} + +/*-************************************* +* Context +***************************************/ + +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + U32 *suffix; + size_t suffixSize; + U32 *freqs; + U32 *dmerAt; + unsigned d; +} COVER_ctx_t; + +/* We need a global context for qsort... */ +static COVER_ctx_t *g_ctx = NULL; + +/*-************************************* +* Helper functions +***************************************/ + +/** + * Returns the sum of the sample sizes. + */ +static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + size_t i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + +/** + * Returns -1 if the dmer at lp is less than the dmer at rp. + * Return 0 if the dmers at lp and rp are equal. + * Returns 1 if the dmer at lp is greater than the dmer at rp. + */ +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); +} +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} + +/** + * Same as COVER_cmp() except ties are broken by pointer value + * NOTE: g_ctx must be set to call this function. A global is required because + * qsort doesn't take an opaque pointer. + */ +static int COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_ctx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} +/** + * Faster version for d <= 8. + */ +static int COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_ctx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} + +/** + * Returns the first pointer in [first, last) whose element does not compare + * less than value. If no such element exists it returns last. + */ +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, + size_t value) { + size_t count = last - first; + while (count != 0) { + size_t step = count / 2; + const size_t *ptr = first; + ptr += step; + if (*ptr < value) { + first = ++ptr; + count -= step + 1; + } else { + count = step; + } + } + return first; +} + +/** + * Generic groupBy function. + * Groups an array sorted by cmp into groups with equivalent values. + * Calls grp for each group. + */ +static void +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, + int (*cmp)(COVER_ctx_t *, const void *, const void *), + void (*grp)(COVER_ctx_t *, const void *, const void *)) { + const BYTE *ptr = (const BYTE *)data; + size_t num = 0; + while (num < count) { + const BYTE *grpEnd = ptr + size; + ++num; + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { + grpEnd += size; + ++num; + } + grp(ctx, ptr, grpEnd); + ptr = grpEnd; + } +} + +/*-************************************* +* Cover functions +***************************************/ + +/** + * Called on each group of positions with the same dmer. + * Counts the frequency of each dmer and saves it in the suffix array. + * Fills `ctx->dmerAt`. + */ +static void COVER_group(COVER_ctx_t *ctx, const void *group, + const void *groupEnd) { + /* The group consists of all the positions with the same first d bytes. */ + const U32 *grpPtr = (const U32 *)group; + const U32 *grpEnd = (const U32 *)groupEnd; + /* The dmerId is how we will reference this dmer. + * This allows us to map the whole dmer space to a much smaller space, the + * size of the suffix array. + */ + const U32 dmerId = (U32)(grpPtr - ctx->suffix); + /* Count the number of samples this dmer shows up in */ + U32 freq = 0; + /* Details */ + const size_t *curOffsetPtr = ctx->offsets; + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a + * different sample than the last. + */ + size_t curSampleEnd = ctx->offsets[0]; + for (; grpPtr != grpEnd; ++grpPtr) { + /* Save the dmerId for this position so we can get back to it. */ + ctx->dmerAt[*grpPtr] = dmerId; + /* Dictionaries only help for the first reference to the dmer. + * After that zstd can reference the match from the previous reference. + * So only count each dmer once for each sample it is in. + */ + if (*grpPtr < curSampleEnd) { + continue; + } + freq += 1; + /* Binary search to find the end of the sample *grpPtr is in. + * In the common case that grpPtr + 1 == grpEnd we can skip the binary + * search because the loop is over. + */ + if (grpPtr + 1 != grpEnd) { + const size_t *sampleEndPtr = + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); + curSampleEnd = *sampleEndPtr; + curOffsetPtr = sampleEndPtr + 1; + } + } + /* At this point we are never going to look at this segment of the suffix + * array again. We take advantage of this fact to save memory. + * We store the frequency of the dmer in the first position of the group, + * which is dmerId. + */ + ctx->suffix[dmerId] = freq; +} + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of dmer d. + * Let S_i be the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer d is in the dictionay we set F(d) = 0. + */ +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, U32 begin, + U32 end, + ZDICT_cover_params_t parameters) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 dmersInK = k - d + 1; + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + /* Reset the activeDmers in the segment */ + COVER_map_clear(activeDmers); + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* The dmerId for the dmer at the next position */ + U32 newDmer = ctx->dmerAt[activeSegment.end]; + /* The entry in activeDmers for this dmerId */ + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); + /* If the dmer isn't already present in the segment add its score. */ + if (*newDmerOcc == 0) { + /* The paper suggest using the L-0.5 norm, but experiments show that it + * doesn't help. + */ + activeSegment.score += freqs[newDmer]; + } + /* Add the dmer to the segment */ + activeSegment.end += 1; + *newDmerOcc += 1; + + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + U32 delDmer = ctx->dmerAt[activeSegment.begin]; + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); + activeSegment.begin += 1; + *delDmerOcc -= 1; + /* If this is the last occurence of the dmer, subtract its score */ + if (*delDmerOcc == 0) { + COVER_map_remove(activeDmers, delDmer); + activeSegment.score -= freqs[delDmer]; + } + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + { + /* Trim off the zero frequency head and tail from the segment. */ + U32 newBegin = bestSegment.end; + U32 newEnd = bestSegment.begin; + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + U32 freq = freqs[ctx->dmerAt[pos]]; + if (freq != 0) { + newBegin = MIN(newBegin, pos); + newEnd = pos + 1; + } + } + bestSegment.begin = newBegin; + bestSegment.end = newEnd; + } + { + /* Zero out the frequency of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + freqs[ctx->dmerAt[pos]] = 0; + } + } + return bestSegment; +} + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { + /* k and d are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + return 1; +} + +/** + * Clean up a context initialized with `COVER_ctx_init()`. + */ +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { + if (!ctx) { + return; + } + if (ctx->suffix) { + free(ctx->suffix); + ctx->suffix = NULL; + } + if (ctx->freqs) { + free(ctx->freqs); + ctx->freqs = NULL; + } + if (ctx->dmerAt) { + free(ctx->dmerAt); + ctx->dmerAt = NULL; + } + if (ctx->offsets) { + free(ctx->offsets); + ctx->offsets = NULL; + } +} + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 1 on success or zero on error. + * The context must be destroyed with `COVER_ctx_destroy()`. + */ +static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d) { + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); + return 0; + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples, + (U32)totalSamplesSize); + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + /* Partial suffix array */ + ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* Maps index to the dmerID */ + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + COVER_ctx_destroy(ctx); + return 0; + } + ctx->freqs = NULL; + ctx->d = d; + + /* Fill offsets from the samlesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); + { + /* suffix is a partial suffix array. + * It only sorts suffixes by their first parameters.d bytes. + * The sort is stable, so each dmer group is sorted by position in input. + */ + U32 i; + for (i = 0; i < ctx->suffixSize; ++i) { + ctx->suffix[i] = i; + } + /* qsort doesn't take an opaque pointer, so pass as a global */ + g_ctx = ctx; + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); + } + DISPLAYLEVEL(2, "Computing frequencies\n"); + /* For each dmer group (group of positions with the same first d bytes): + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is + * (groupBeginPtr - suffix). This allows us to go from position to + * dmerID so we can look up values in freq. + * 2. We calculate how many samples the dmer occurs in and save it in + * freqs[dmerId]. + */ + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); + ctx->freqs = ctx->suffix; + ctx->suffix = NULL; + return 1; +} + +/** + * Given the prepared context build the dictionary. + */ +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, void *dictBuffer, + size_t dictBufferCapacity, + ZDICT_cover_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data up into epochs of equal size. + * We will select at least one segment from each epoch. + */ + const U32 epochs = (U32)(dictBufferCapacity / parameters.k); + const U32 epochSize = (U32)(ctx->suffixSize / epochs); + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs, + epochSize); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { + const U32 epochBegin = (U32)(epoch * epochSize); + const U32 epochEnd = epochBegin + epochSize; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = COVER_selectSegment( + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content */ + if (segment.score == 0) { + break; + } + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + COVER_ctx_t ctx; + COVER_map_t activeDmers; + + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Checks */ + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Initialize context and activeDmers */ + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d)) { + return ERROR(GENERIC); + } + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + COVER_ctx_destroy(&ctx); + return ERROR(GENERIC); + } + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, + dictBufferCapacity, parameters); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (U32)dictionarySize); + } + COVER_ctx_destroy(&ctx); + COVER_map_destroy(&activeDmers); + return dictionarySize; + } +} + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * Initialize the `COVER_best_t`. + */ +static void COVER_best_init(COVER_best_t *best) { + if (best==NULL) return; /* compatible with init on NULL */ + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); + best->liveJobs = 0; + best->dict = NULL; + best->dictSize = 0; + best->compressedSize = (size_t)-1; + memset(&best->parameters, 0, sizeof(best->parameters)); +} + +/** + * Wait until liveJobs == 0. + */ +static void COVER_best_wait(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + while (best->liveJobs != 0) { + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); + } + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +static void COVER_best_destroy(COVER_best_t *best) { + if (!best) { + return; + } + COVER_best_wait(best); + if (best->dict) { + free(best->dict); + } + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); +} + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +static void COVER_best_start(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + ++best->liveJobs; + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, + ZDICT_cover_params_t parameters, void *dict, + size_t dictSize) { + if (!best) { + return; + } + { + size_t liveJobs; + ZSTD_pthread_mutex_lock(&best->mutex); + --best->liveJobs; + liveJobs = best->liveJobs; + /* If the new dictionary is better */ + if (compressedSize < best->compressedSize) { + /* Allocate space if necessary */ + if (!best->dict || best->dictSize < dictSize) { + if (best->dict) { + free(best->dict); + } + best->dict = malloc(dictSize); + if (!best->dict) { + best->compressedSize = ERROR(GENERIC); + best->dictSize = 0; + return; + } + } + /* Save the dictionary, parameters, and size */ + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; + } + ZSTD_pthread_mutex_unlock(&best->mutex); + if (liveJobs == 0) { + ZSTD_pthread_cond_broadcast(&best->cond); + } + } +} + +/** + * Parameters for COVER_tryParameters(). + */ +typedef struct COVER_tryParameters_data_s { + const COVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} COVER_tryParameters_data_t; + +/** + * Tries a set of parameters and upates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void COVER_tryParameters(void *opaque) { + /* Save parameters as local variables */ + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; + const COVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Allocate space for hash table, dict, and freqs */ + COVER_map_t activeDmers; + BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + goto _cleanup; + } + if (!dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, + dictBufferCapacity, parameters); + dictBufferCapacity = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, + parameters.zParams); + if (ZDICT_isError(dictBufferCapacity)) { + DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); + goto _cleanup; + } + } + /* Check total compressed size */ + { + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + for (i = 0; i < ctx->nbSamples; ++i) { + maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + for (i = 0; i < ctx->nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], + ctx->samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = ERROR(GENERIC); + goto _compressCleanup; + } + totalCompressedSize += size; + } + _compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + } + +_cleanup: + COVER_best_finish(data->best, totalCompressedSize, parameters, dict, + dictBufferCapacity); + free(data); + COVER_map_destroy(&activeDmers); + if (dict) { + free(dict); + } + if (freqs) { + free(freqs); + } +} + +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t *parameters) { + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + + /* Checks */ + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + COVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return ERROR(GENERIC); + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( + sizeof(COVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + COVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(GENERIC); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = *parameters; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.steps = kSteps; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &COVER_tryParameters, data); + } else { + COVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (U32)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + COVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + *parameters = best.parameters; + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.c b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.c new file mode 100644 index 0000000..60cceb0 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.c @@ -0,0 +1,1913 @@ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +#include "divsufsort.h" + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.h b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.h new file mode 100644 index 0000000..5440994 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/divsufsort.h @@ -0,0 +1,67 @@ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.c b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.c new file mode 100644 index 0000000..7d24e49 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.c @@ -0,0 +1,1108 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# define _LARGEFILE_SOURCE +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# define _LARGEFILE64_SOURCE +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */ +#include "zstd_internal.h" /* includes zstd.h */ +#include "xxhash.h" /* XXH64 */ +#include "divsufsort.h" +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +static const int g_compressionLevel_default = 3; +static const U32 g_selectivity_default = 9; + + +/*-************************************* +* Console display +***************************************/ +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAY("%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio, U32 notificationLevel) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 const pattern16 = MEM_read16(b+pos+4); + U32 u, patternEnd = 6; + while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; + if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; + for (u=1; u= MINMATCHLENGTH); + } + + /* look backward */ + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); + DISPLAYLEVEL(4, "\n"); + + for (searchLength = MINMATCHLENGTH ; ; searchLength++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[suffix[id] + searchLength] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + searchLength]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new ref */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } + + /* look backward */ + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n", + (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 eltEnd = elt.pos + elt.length; + const char* const buf = (const char*) buffer; + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + /* append */ + U32 const addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + /* append */ + int const addedLength = (int)eltEnd - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + /* sort : improve rank */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } + + if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { + if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { + size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); + table[u].pos = elt.pos; + table[u].savings += (U32)(elt.savings * addedLength / elt.length); + table[u].length = MIN(elt.length, table[u].length + 1); + return u; + } + } + } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) +{ + /* merge if possible */ + U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) \ + { displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (notificationLevel>=4) fflush(stderr); } } + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CCtx* ref; /* contains reference to dictionary */ + ZSTD_CCtx* zc; /* working context */ + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, + U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } + } + cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total=0; + unsigned u; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(U32* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + unsigned compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) +{ + U32 countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + U32 offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + U32 matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + U32 litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + + /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); + esr.ref = ZSTD_createCCtx(); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); + if (!esr.ref || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; + for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; + for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel<=0) compressionLevel = g_compressionLevel_default; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0); + if (ZSTD_isError(beginResult)) { + DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult)); + eSize = ERROR(GENERIC); + goto _cleanup; + } } + + /* collect stats on all samples */ + for (u=0; u dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + +size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + size_t hSize = 8; + + /* calculate entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* add dictionary header (after entropy tables) */ + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); + { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32((char*)dictBuffer+4, dictID); + } + + if (hSize + dictContentSize < dictBufferCapacity) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); + return MIN(dictBufferCapacity, hSize+dictContentSize); +} + + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : +* Warning : `samplesBuffer` must be followed by noisy guard band. +* @return : size of dictionary, or an error code which can be tested with ZDICT_isError() +*/ +size_t ZDICT_trainFromBuffer_unsafe_legacy( + void* dictBuffer, size_t maxDictSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); + unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; + unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + U32 const notificationLevel = params.zParams.notificationLevel; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + + /* build dictionary */ + ZDICT_trainBuffer_legacy(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep, notificationLevel); + + /* display best matches */ + if (params.zParams.notificationLevel>= 3) { + U32 const nb = MIN(25, dictList[0].pos); + U32 const dictContentSize = ZDICT_dictSize(dictList); + U32 u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u samplesBuffSize) || ((pos + length) > samplesBuffSize)) + return ERROR(GENERIC); /* should never happen */ + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, dictList[u].savings); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { U32 dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ + if (dictContentSize < targetDictSize/4) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize); + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20)); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + } + + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + U32 proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params.zParams); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* ZDICT_trainFromBuffer_legacy() : + * issue : samplesBuffer need to be followed by a noisy guard band. + * work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_cover_params_t params; + DEBUGLOG(3, "ZDICT_trainFromBuffer"); + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + /* Default to level 6 since no compression level information is available */ + params.zParams.compressionLevel = 6; +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) + params.zParams.notificationLevel = ZSTD_DEBUG; +#endif + return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + ¶ms); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.h b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.h new file mode 100644 index 0000000..ad459c2 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dictBuilder/zdict.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*====== Dependencies ======*/ +#include /* size_t */ + + +/* ===== ZDICTLIB_API : control library symbols visibility ===== */ +#ifndef ZDICTLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZDICTLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZDICTLIB_API ZDICTLIB_VISIBILITY +#endif + + +/*! ZDICT_trainFromBuffer(): + * Train a dictionary from an array of samples. + * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +/*====== Helper functions ======*/ +ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); +ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); + + + +#ifdef ZDICT_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +typedef struct { + int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */ +} ZDICT_params_t; + +/*! ZDICT_cover_params_t: + * k and d are the only required parameters. + * For others, value 0 means default. + */ +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + ZDICT_params_t zParams; +} ZDICT_cover_params_t; + + +/*! ZDICT_trainFromBuffer_cover(): + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_cover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations and picks the best parameters. + * `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * + * All of the parameters d, k, steps are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters); + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. + * + * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. + * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. + * Note 2: dictBuffer and dictContent can overlap + */ +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + ZDICT_params_t zParams; +} ZDICT_legacy_params_t; + +/*! ZDICT_trainFromBuffer_legacy(): + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * `parameters` is optional and can be provided with values set to 0 to mean "default". + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API +# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +#endif /* ZDICT_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_H_001 */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dll/example/Makefile b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/Makefile new file mode 100644 index 0000000..45d0db3 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/Makefile @@ -0,0 +1,47 @@ +# ################################################################ +# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +VOID := /dev/null +ZSTDDIR := ../include +LIBDIR := ../static +DLLDIR := ../dll + +CFLAGS ?= -O3 # can select custom flags. For example : CFLAGS="-O2 -g" make +CFLAGS += -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \ + -Wdeclaration-after-statement -Wstrict-prototypes \ + -Wpointer-arith -Wstrict-aliasing=1 +CFLAGS += $(MOREFLAGS) +CPPFLAGS:= -I$(ZSTDDIR) -DXXH_NAMESPACE=ZSTD_ +FLAGS := $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) + + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +else +EXT = +endif + +.PHONY: default fullbench-dll fullbench-lib + + +default: all + +all: fullbench-dll fullbench-lib + + +fullbench-lib: fullbench.c datagen.c + $(CC) $(FLAGS) $^ -o $@$(EXT) $(LIBDIR)/libzstd_static.lib + +fullbench-dll: fullbench.c datagen.c + $(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(DLLDIR)/libzstd.dll + +clean: + @$(RM) fullbench-dll$(EXT) fullbench-lib$(EXT) \ + @echo Cleaning completed diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dll/example/README.md b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/README.md new file mode 100644 index 0000000..e231f59 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/README.md @@ -0,0 +1,69 @@ +ZSTD Windows binary package +==================================== + +#### The package contents + +- `zstd.exe` : Command Line Utility, supporting gzip-like arguments +- `dll\libzstd.dll` : The ZSTD dynamic library (DLL) +- `dll\libzstd.lib` : The import library of the ZSTD dynamic library (DLL) for Visual C++ +- `example\` : The example of usage of the ZSTD library +- `include\` : Header files required by the ZSTD library +- `static\libzstd_static.lib` : The static ZSTD library (LIB) + + +#### Usage of Command Line Interface + +Command Line Interface (CLI) supports gzip-like arguments. +By default CLI takes an input file and compresses it to an output file: +``` + Usage: zstd [arg] [input] [output] +``` +The full list of commands for CLI can be obtained with `-h` or `-H`. The ratio can +be improved with commands from `-3` to `-16` but higher levels also have slower +compression. CLI includes in-memory compression benchmark module with compression +levels starting from `-b` and ending with `-e` with iteration time of `-i` seconds. +CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined +into `-b1e18i1`. + + +#### The example of usage of static and dynamic ZSTD libraries with gcc/MinGW + +Use `cd example` and `make` to build `fullbench-dll` and `fullbench-lib`. +`fullbench-dll` uses a dynamic ZSTD library from the `dll` directory. +`fullbench-lib` uses a static ZSTD library from the `lib` directory. + + +#### Using ZSTD DLL with gcc/MinGW + +The header files from `include\` and the dynamic library `dll\libzstd.dll` +are required to compile a project using gcc/MinGW. +The dynamic library has to be added to linking options. +It means that if a project that uses ZSTD consists of a single `test-dll.c` +file it should be linked with `dll\libzstd.dll`. For example: +``` + gcc $(CFLAGS) -Iinclude\ test-dll.c -o test-dll dll\libzstd.dll +``` +The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`. + + +#### The example of usage of static and dynamic ZSTD libraries with Visual C++ + +Open `example\fullbench-dll.sln` to compile `fullbench-dll` that uses a +dynamic ZSTD library from the `dll` directory. The solution works with Visual C++ +2010 or newer. When one will open the solution with Visual C++ newer than 2010 +then the solution will upgraded to the current version. + + +#### Using ZSTD DLL with Visual C++ + +The header files from `include\` and the import library `dll\libzstd.lib` +are required to compile a project using Visual C++. + +1. The path to header files should be added to `Additional Include Directories` that can + be found in project properties `C/C++` then `General`. +2. The import library has to be added to `Additional Dependencies` that can + be found in project properties `Linker` then `Input`. + If one will provide only the name `libzstd.lib` without a full path to the library + the directory has to be added to `Linker\General\Additional Library Directories`. + +The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`. diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dll/example/build_package.bat b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/build_package.bat new file mode 100644 index 0000000..cae0a15 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/build_package.bat @@ -0,0 +1,19 @@ +@ECHO OFF +MKDIR bin\dll bin\static bin\example bin\include +COPY tests\fullbench.c bin\example\ +COPY programs\datagen.c bin\example\ +COPY programs\datagen.h bin\example\ +COPY programs\util.h bin\example\ +COPY programs\platform.h bin\example\ +COPY lib\common\mem.h bin\example\ +COPY lib\common\zstd_errors.h bin\example\ +COPY lib\common\zstd_internal.h bin\example\ +COPY lib\common\error_private.h bin\example\ +COPY lib\common\xxhash.h bin\example\ +COPY lib\zstd.h bin\include\ +COPY lib\libzstd.a bin\static\libzstd_static.lib +COPY lib\dll\libzstd.* bin\dll\ +COPY lib\dll\example\Makefile bin\example\ +COPY lib\dll\example\fullbench-dll.* bin\example\ +COPY lib\dll\example\README.md bin\ +COPY programs\zstd.exe bin\zstd.exe diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.sln b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.sln new file mode 100644 index 0000000..72e302e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.sln @@ -0,0 +1,25 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2012 for Windows Desktop +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench-dll", "fullbench-dll.vcxproj", "{13992FD2-077E-4954-B065-A428198201A9}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.ActiveCfg = Debug|Win32 + {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.Build.0 = Debug|Win32 + {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.ActiveCfg = Debug|x64 + {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.Build.0 = Debug|x64 + {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.ActiveCfg = Release|Win32 + {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32 + {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64 + {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.vcxproj b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.vcxproj new file mode 100644 index 0000000..44bbaf7 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dll/example/fullbench-dll.vcxproj @@ -0,0 +1,181 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {00000000-1CC8-4FD7-9281-6B8DBB9D3DF8} + Win32Proj + fullbench-dll + $(SolutionDir)bin\$(Platform)_$(Configuration)\ + $(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\ + + + + Application + true + MultiByte + + + Application + true + MultiByte + + + Application + false + true + MultiByte + + + Application + false + true + MultiByte + + + + + + + + + + + + + + + + + + + true + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath); + false + + + true + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath); + false + + + false + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath); + false + + + false + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath); + false + + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;ZSTD_DLL_IMPORT=1;%(PreprocessorDefinitions) + true + false + ..\include + + + Console + true + $(SolutionDir)..\dll;%(AdditionalLibraryDirectories) + libzstd.lib;%(AdditionalDependencies) + false + + + + + + + Level4 + Disabled + WIN32;_DEBUG;_CONSOLE;ZSTD_DLL_IMPORT=1;%(PreprocessorDefinitions) + true + false + ..\include + + + Console + true + $(SolutionDir)..\dll;%(AdditionalLibraryDirectories) + libzstd.lib;%(AdditionalDependencies) + + + + + Level4 + + + MaxSpeed + true + true + WIN32;_DEBUG;_CONSOLE;ZSTD_DLL_IMPORT=1;%(PreprocessorDefinitions) + false + ..\include + false + MultiThreaded + + + Console + true + true + true + $(SolutionDir)..\dll;%(AdditionalLibraryDirectories) + libzstd.lib;%(AdditionalDependencies) + false + + + + + Level4 + + + MaxSpeed + true + true + WIN32;_DEBUG;_CONSOLE;ZSTD_DLL_IMPORT=1;%(PreprocessorDefinitions) + false + false + ..\include + MultiThreaded + + + Console + true + true + true + $(SolutionDir)..\dll;%(AdditionalLibraryDirectories) + libzstd.lib;%(AdditionalDependencies) + + + + + + + + + + + + + \ No newline at end of file diff --git a/c-blosc/internal-complibs/zstd-1.3.4/dll/libzstd.def b/c-blosc/internal-complibs/zstd-1.3.4/dll/libzstd.def new file mode 100644 index 0000000..51d0c19 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/dll/libzstd.def @@ -0,0 +1,88 @@ +LIBRARY libzstd.dll +EXPORTS + ZDICT_getDictID + ZDICT_getErrorName + ZDICT_isError + ZDICT_trainFromBuffer + ZSTD_CStreamInSize + ZSTD_CStreamOutSize + ZSTD_DStreamInSize + ZSTD_DStreamOutSize + ZSTD_adjustCParams + ZSTD_checkCParams + ZSTD_compress + ZSTD_compressBegin + ZSTD_compressBegin_advanced + ZSTD_compressBegin_usingDict + ZSTD_compressBlock + ZSTD_compressBound + ZSTD_compressCCtx + ZSTD_compressContinue + ZSTD_compressEnd + ZSTD_compressStream + ZSTD_compress_advanced + ZSTD_compress_usingCDict + ZSTD_compress_usingDict + ZSTD_copyCCtx + ZSTD_copyDCtx + ZSTD_createCCtx + ZSTD_createCCtx_advanced + ZSTD_createCDict + ZSTD_createCDict_advanced + ZSTD_createCStream + ZSTD_createCStream_advanced + ZSTD_createDCtx + ZSTD_createDCtx_advanced + ZSTD_createDDict + ZSTD_createDStream + ZSTD_createDStream_advanced + ZSTD_decompress + ZSTD_decompressBegin + ZSTD_decompressBegin_usingDict + ZSTD_decompressBlock + ZSTD_decompressContinue + ZSTD_decompressDCtx + ZSTD_decompressStream + ZSTD_decompress_usingDDict + ZSTD_decompress_usingDict + ZSTD_endStream + ZSTD_estimateCCtxSize + ZSTD_estimateDCtxSize + ZSTD_flushStream + ZSTD_freeCCtx + ZSTD_freeCDict + ZSTD_freeCStream + ZSTD_freeDCtx + ZSTD_freeDDict + ZSTD_freeDStream + ZSTD_getBlockSizeMax + ZSTD_getCParams + ZSTD_getDecompressedSize + ZSTD_findDecompressedSize + ZSTD_getFrameContentSize + ZSTD_getErrorName + ZSTD_getFrameParams + ZSTD_getParams + ZSTD_initCStream + ZSTD_initCStream_advanced + ZSTD_initCStream_usingCDict + ZSTD_initCStream_usingDict + ZSTD_initDStream + ZSTD_initDStream_usingDDict + ZSTD_initDStream_usingDict + ZSTD_insertBlock + ZSTD_isError + ZSTD_isFrame + ZSTD_maxCLevel + ZSTD_nextInputType + ZSTD_nextSrcSizeToDecompress + ZSTD_resetCStream + ZSTD_resetDStream + ZSTD_setDStreamParameter + ZSTD_sizeof_CCtx + ZSTD_sizeof_CDict + ZSTD_sizeof_CStream + ZSTD_sizeof_DCtx + ZSTD_sizeof_DDict + ZSTD_sizeof_DStream + ZSTD_versionNumber diff --git a/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_legacy.h b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_legacy.h new file mode 100644 index 0000000..5893cb9 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_legacy.h @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include "mem.h" /* MEM_STATIC */ +#include "error_private.h" /* ERROR */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +# include "zstd_v01.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +# include "zstd_v02.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +# include "zstd_v03.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +# include "zstd_v04.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +# include "zstd_v05.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +# include "zstd_v06.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +# include "zstd_v07.h" +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, + size_t compressedSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + return ZSTDv05_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + return ZSTDv06_findFrameCompressedSize(src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + return ZSTDv07_findFrameCompressedSize(src, compressedSize); +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.c b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.c new file mode 100644 index 0000000..ae1cb2c --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.c @@ -0,0 +1,2127 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include "zstd_v01.h" +#include "error_private.h" + + +/****************************************** +* Static allocation +******************************************/ +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef int64_t S64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +typedef signed long long S64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* FSE_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define FSE_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define FSE_FORCE_MEMORY_ACCESS 1 +# endif +#endif + + +static unsigned FSE_32bits(void) +{ + return sizeof(void*)==4; +} + +static unsigned FSE_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) + +static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +static U16 FSE_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 FSE_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U64 FSE_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +#endif // FSE_FORCE_MEMORY_ACCESS + +static U16 FSE_readLE16(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +static U32 FSE_readLE32(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +static U64 FSE_readLE64(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +static size_t FSE_readLEST(const void* memPtr) +{ + if (FSE_32bits()) + return (size_t)FSE_readLE32(memPtr); + else + return (size_t)FSE_readLE64(memPtr); +} + + + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef struct +{ + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +/**************************************************************** +* Internal functions +****************************************************************/ +FORCE_INLINE unsigned FSE_highbit32 (U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +#define FSE_DECODE_TYPE FSE_decode_t + + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +static size_t FSE_buildDTable +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; + if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; + + /* Init, lay down lowprob symbols */ + DTableH[0].tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; ifastMode = (U16)noLarge; + return 0; +} + + +/****************************************** +* FSE byte symbol +******************************************/ +#ifndef FSE_COMMONDEFS_ONLY + +static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } + +static short FSE_abs(short a) +{ + return a<0? -a : a; +} + + +/**************************************************************** +* Header bitstream management +****************************************************************/ +static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; + bitStream = FSE_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = FSE_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + + +/* FSE_initDStream + * Initialize a FSE_DStream_t. + * srcBuffer must point at the beginning of an FSE block. + * The function result is the size of the FSE_block (== srcSize). + * If srcSize is too small, the function will return an errorCode; + */ +static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; + + if (srcSize >= sizeof(size_t)) + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = FSE_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + + +/*!FSE_lookBits + * Provides next n bits from the bitContainer. + * bitContainer is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * return : value extracted. + */ +static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + + +/*!FSE_readBits + * Read next n bits from the bitContainer. + * On 32-bits, don't read more than maxNbBits==25 + * On 64-bits, don't read more than maxNbBits==57 + * Use the fast variant *only* if n >= 1. + * return : value extracted. + */ +static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) +{ + size_t value = FSE_lookBits(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + size_t value = FSE_lookBitsFast(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return FSE_DStream_tooFar; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = FSE_readLEST(bitD->ptr); + return FSE_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; + return FSE_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + U32 result = FSE_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = FSE_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + + +static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); + FSE_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/* FSE_endOfDStream + Tells if bitD has reached end of bitStream or not */ + +static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) +{ + return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); +} + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + FSE_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ + while (1) + { + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ + + return (size_t)-FSE_ERROR_corruptionDetected; +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +/* ******************************************************* +* Huff0 : Huffman block compression +*********************************************************/ +#define HUF_MAX_SYMBOL_VALUE 255 +#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ +#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + +typedef struct HUF_CElt_s { + U16 val; + BYTE nbBits; +} HUF_CElt ; + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + + +/* ******************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { + BYTE byte; + BYTE nbBits; +} HUF_DElt; + +static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 weightTotal; + U32 maxBits; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DElt* const dt = (HUF_DElt*)ptr; + + if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + iSize = ip[0]; + + FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, sizeof(huffWeight)); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankVal, 0, sizeof(rankVal)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; + rankVal[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; + + /* get last non-null symbol weight (implied, total must be 2^n) */ + maxBits = FSE_highbit32(weightTotal) + 1; + if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ + DTable[0] = (U16)maxBits; + { + U32 total = 1 << maxBits; + U32 rest = total - weightTotal; + U32 verif = 1 << FSE_highbit32(rest); + U32 lastWeight = FSE_highbit32(rest) + 1; + if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankVal[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=maxBits; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n<=oSize; n++) + { + const U32 w = huffWeight[n]; + const U32 length = (1 << w) >> 1; + U32 i; + HUF_DElt D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize+1; +} + + +static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) +{ + const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + FSE_skipBits(Dstream, dt[val].nbBits); + return c; +} + +static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-15; + + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; + + /* Init */ + + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; + + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + + errorCode = FSE_initDStream(&bitD1, start1, length1); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD2, start2, length2); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD3, start3, length3); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD4, start4, length4); + if (FSE_isError(errorCode)) return errorCode; + + reloadStatus=FSE_reloadDStream(&bitD2); + + /* 16 symbols per loop */ + for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) + +#define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); + } + + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; + + /* tail */ + { + // bitTail = bitD1; // *much* slower : -20% !??! + FSE_DStream_t bitTail; + bitTail.ptr = bitD1.ptr; + bitTail.bitsConsumed = bitD1.bitsConsumed; + bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer + bitTail.start = start1; + for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ + +/* + zstd - standard compression library + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/**************************************************************** +* Tuning parameters +*****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect */ +#define ZSTD_MEMORY_USAGE 17 + + +/************************************** + CPU Feature Detection +**************************************/ +/* + * Automated efficient unaligned memory access detection + * Based on known hardware architectures + * This list will be updated thanks to feedbacks + */ +#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ + || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__x86_64__) \ + || defined(_M_IX86) || defined(_M_X64) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ + || (defined(_M_ARM) && (_M_ARM >= 7)) +# define ZSTD_UNALIGNED_ACCESS 1 +#else +# define ZSTD_UNALIGNED_ACCESS 0 +#endif + + +/******************************************************** +* Includes +*********************************************************/ +#include /* calloc */ +#include /* memcpy, memmove */ +#include /* debug : printf */ + + +/******************************************************** +* Compiler specifics +*********************************************************/ +#ifdef __AVX2__ +# include /* AVX2 intrinsics */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/******************************************************** +* Basic Types +*********************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + + +/******************************************************** +* Constants +*********************************************************/ +static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ + +#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) +#define HASH_TABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASH_TABLESIZE - 1) + +#define KNUTH 2654435761 + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BLOCKSIZE (128 KB) /* define, for static allocation */ + +#define WORKPLACESIZE (BLOCKSIZE*3) +#define MINMATCH 4 +#define MLbits 7 +#define LLbits 6 +#define Offbits 5 +#define MaxML ((1<>3]; +#else + U32 hashTable[HASH_TABLESIZE]; +#endif + BYTE buffer[WORKPLACESIZE]; +} cctxi_t; + + + + +/************************************** +* Error Management +**************************************/ +/* published entry point */ +unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); } + + +/************************************** +* Tool functions +**************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/************************************************************** +* Decompression code +**************************************************************/ + +size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + const BYTE* const in = (const BYTE* const)src; + BYTE headerFlags; + U32 cSize; + + if (srcSize < 3) return ERROR(srcSize_wrong); + + headerFlags = *in; + cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); + + bpPtr->blockType = (blockType_t)(headerFlags >> 6); + bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; + + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; +} + + +static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_decompressLiterals(void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + maxDstSize; + const BYTE* ip = (const BYTE*)src; + size_t errorCode; + size_t litSize; + + /* check : minimum 2, for litSize, +1, for content */ + if (srcSize <= 3) return ERROR(corruption_detected); + + litSize = ip[1] + (ip[0]<<8); + litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... + op = oend - litSize; + + (void)ctx; + if (litSize > maxDstSize) return ERROR(dstSize_tooSmall); + errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); + if (FSE_isError(errorCode)) return ERROR(GENERIC); + return litSize; +} + + +size_t ZSTDv01_decodeLiteralsBlock(void* ctx, + void* dst, size_t maxDstSize, + const BYTE** litStart, size_t* litSize, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + blockProperties_t litbp; + + size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp); + if (ZSTDv01_isError(litcSize)) return litcSize; + if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + ip += ZSTD_blockHeaderSize; + + switch(litbp.blockType) + { + case bt_raw: + *litStart = ip; + ip += litcSize; + *litSize = litcSize; + break; + case bt_rle: + { + size_t rleSize = litbp.origSize; + if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); + if (!srcSize) return ERROR(srcSize_wrong); + memset(oend - rleSize, *ip, rleSize); + *litStart = oend - rleSize; + *litSize = rleSize; + ip++; + break; + } + case bt_compressed: + { + size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); + if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize; + *litStart = oend - decodedLitSize; + *litSize = decodedLitSize; + ip += litcSize; + break; + } + case bt_end: + default: + return ERROR(GENERIC); + } + + return ip-istart; +} + + +size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + const BYTE* const iend = istart + srcSize; + U32 LLtype, Offtype, MLtype; + U32 LLlog, Offlog, MLlog; + size_t dumpsLength; + + /* check */ + if (srcSize < 5) return ERROR(srcSize_wrong); + + /* SeqHead */ + *nbSeq = ZSTD_readLE16(ip); ip+=2; + LLtype = *ip >> 6; + Offtype = (*ip >> 4) & 3; + MLtype = (*ip >> 2) & 3; + if (*ip & 2) + { + dumpsLength = ip[2]; + dumpsLength += ip[1] << 8; + ip += 3; + } + else + { + dumpsLength = ip[1]; + dumpsLength += (ip[0] & 1) << 8; + ip += 2; + } + *dumpsPtr = ip; + ip += dumpsLength; + *dumpsLengthPtr = dumpsLength; + + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + + /* sequences */ + { + S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ + size_t headerSize; + + /* Build DTables */ + switch(LLtype) + { + case bt_rle : + LLlog = 0; + FSE_buildDTable_rle(DTableLL, *ip++); break; + case bt_raw : + LLlog = LLbits; + FSE_buildDTable_raw(DTableLL, LLbits); break; + default : + { U32 max = MaxLL; + headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (LLlog > LLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableLL, norm, max, LLlog); + } } + + switch(Offtype) + { + case bt_rle : + Offlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableOffb, *ip++); break; + case bt_raw : + Offlog = Offbits; + FSE_buildDTable_raw(DTableOffb, Offbits); break; + default : + { U32 max = MaxOff; + headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (Offlog > OffFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableOffb, norm, max, Offlog); + } } + + switch(MLtype) + { + case bt_rle : + MLlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableML, *ip++); break; + case bt_raw : + MLlog = MLbits; + FSE_buildDTable_raw(DTableML, MLbits); break; + default : + { U32 max = MaxML; + headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (MLlog > MLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableML, norm, max, MLlog); + } } } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t offset; + size_t matchLength; +} seq_t; + +typedef struct { + FSE_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset; + const BYTE* dumps; + const BYTE* dumpsEnd; +} seqState_t; + + +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +{ + size_t litLength; + size_t prevOffset; + size_t offset; + size_t matchLength; + const BYTE* dumps = seqState->dumps; + const BYTE* const de = seqState->dumpsEnd; + + /* Literal length */ + litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); + prevOffset = litLength ? seq->offset : seqState->prevOffset; + seqState->prevOffset = seq->offset; + if (litLength == MaxLL) + { + U32 add = dumps 1 byte */ + dumps += 3; + } + } + } + + /* Offset */ + { + U32 offsetCode, nbBits; + offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + nbBits = offsetCode - 1; + if (offsetCode==0) nbBits = 0; /* cmove */ + offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + if (offsetCode==0) offset = prevOffset; + } + + /* MatchLength */ + matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + if (matchLength == MaxML) + { + U32 add = dumps 1 byte */ + dumps += 3; + } + } + } + matchLength += MINMATCH; + + /* save result */ + seq->litLength = litLength; + seq->offset = offset; + seq->matchLength = matchLength; + seqState->dumps = dumps; +} + + +static size_t ZSTD_execSequence(BYTE* op, + seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + BYTE* const base, BYTE* const oend) +{ + static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ + static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */ + const BYTE* const ostart = op; + const size_t litLength = sequence.litLength; + BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ + const BYTE* const litEnd = *litPtr + litLength; + + /* check */ + if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ + if (litEnd > litLimit) return ERROR(corruption_detected); + if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */ + + /* copy Literals */ + if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8)) + memmove(op, *litPtr, litLength); /* overwrite risk */ + else + ZSTD_wildcopy(op, *litPtr, litLength); + op += litLength; + *litPtr = litEnd; /* update for next sequence */ + + /* check : last match must be at a minimum distance of 8 from end of dest buffer */ + if (oend-op < 8) return ERROR(dstSize_tooSmall); + + /* copy Match */ + { + const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); + const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ + size_t qutt = 12; + U64 saved[2]; + + /* check */ + if (match < base) return ERROR(corruption_detected); + if (sequence.offset > (size_t)base) return ERROR(corruption_detected); + + /* save beginning of literal sequence, in case of write overlap */ + if (overlapRisk) + { + if ((endMatch + qutt) > oend) qutt = oend-endMatch; + memcpy(saved, endMatch, qutt); + } + + if (sequence.offset < 8) + { + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } else { ZSTD_copy8(op, match); } + op += 8; match += 8; + + if (endMatch > oend-(16-MINMATCH)) + { + if (op < oend-8) + { + ZSTD_wildcopy(op, match, (oend-8) - op); + match += (oend-8) - op; + op = oend-8; + } + while (opLLTable; + U32* DTableML = dctx->MLTable; + U32* DTableOffb = dctx->OffTable; + BYTE* const base = (BYTE*) (dctx->base); + + /* Build Decoding Tables */ + errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + DTableLL, DTableML, DTableOffb, + ip, iend-ip); + if (ZSTDv01_isError(errorCode)) return errorCode; + ip += errorCode; + + /* Regen sequences */ + { + seq_t sequence; + seqState_t seqState; + + memset(&sequence, 0, sizeof(sequence)); + seqState.dumps = dumps; + seqState.dumpsEnd = dumps + dumpsLength; + seqState.prevOffset = 1; + errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); + if (FSE_isError(errorCode)) return ERROR(corruption_detected); + FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); + FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); + FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); + + for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) + { + size_t oneSeqSize; + nbSeq--; + ZSTD_decodeSequence(&sequence, &seqState); + oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); + if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* check if reached exact end */ + if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ + if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ + + /* last literal segment */ + { + size_t lastLLSize = litEnd - litPtr; + if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + + +static size_t ZSTD_decompressBlock( + void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + /* blockType == blockCompressed, srcSize is trusted */ + const BYTE* ip = (const BYTE*)src; + const BYTE* litPtr = NULL; + size_t litSize = 0; + size_t errorCode; + + /* Decode literals sub-block */ + errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); + if (ZSTDv01_isError(errorCode)) return errorCode; + ip += errorCode; + srcSize -= errorCode; + + return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); +} + + +size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* iend = ip + srcSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; + size_t remainingSize = srcSize; + U32 magicNumber; + size_t errorCode=0; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties); + if (ZSTDv01_isError(blockSize)) return blockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); + break; + case bt_raw : + errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet supported */ + break; + case bt_end : + /* end of frame */ + if (remainingSize) return ERROR(srcSize_wrong); + break; + default: + return ERROR(GENERIC); + } + if (blockSize == 0) break; /* bt_end */ + + if (ZSTDv01_isError(errorCode)) return errorCode; + op += errorCode; + ip += blockSize; + remainingSize -= blockSize; + } + + return op-ostart; +} + +size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t ctx; + ctx.base = dst; + return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); +} + +size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + size_t remainingSize = srcSize; + U32 magicNumber; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTDv01_isError(blockSize)) return blockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) return ERROR(srcSize_wrong); + + if (blockSize == 0) break; /* bt_end */ + + ip += blockSize; + remainingSize -= blockSize; + } + + return ip - (const BYTE*)src; +} + +/******************************* +* Streaming Decompression API +*******************************/ + +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize; + dctx->phase = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + return 0; +} + +ZSTDv01_Dctx* ZSTDv01_createDCtx(void) +{ + ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); + if (dctx==NULL) return NULL; + ZSTDv01_resetDCtx(dctx); + return dctx; +} + +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) +{ + free(dctx); + return 0; +} + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) +{ + return ((dctx_t*)dctx)->expected; +} + +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t* ctx = (dctx_t*)dctx; + + /* Sanity check */ + if (srcSize != ctx->expected) return ERROR(srcSize_wrong); + if (dst != ctx->previousDstEnd) /* not contiguous */ + ctx->base = dst; + + /* Decompress : frame header */ + if (ctx->phase == 0) + { + /* Check frame magic header */ + U32 magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + return 0; + } + + /* Decompress : block header */ + if (ctx->phase == 1) + { + blockProperties_t bp; + size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTDv01_isError(blockSize)) return blockSize; + if (bp.blockType == bt_end) + { + ctx->expected = 0; + ctx->phase = 0; + } + else + { + ctx->expected = blockSize; + ctx->bType = bp.blockType; + ctx->phase = 2; + } + + return 0; + } + + /* Decompress : block content */ + { + size_t rSize; + switch(ctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return ERROR(GENERIC); + } + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); + return rSize; + } + +} diff --git a/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.h b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.h new file mode 100644 index 0000000..42f0897 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v01.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/** +ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format + compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + return : the number of bytes that would be read to decompress this frame + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ diff --git a/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v02.c b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v02.c new file mode 100644 index 0000000..8bc0ece --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.3.4/legacy/zstd_v02.c @@ -0,0 +1,3483 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include /* size_t, ptrdiff_t */ +#include "zstd_v02.h" +#include "error_private.h" + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif + + +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif // MEM_FORCE_MEMORY_ACCESS + + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ + + +/* ****************************************************************** + bitstream + Part of NewGen Entropy library + header file (to include) + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which highly benefit from being inlined. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + + +/********************************************** +* bitStream decompression API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/****************************************** +* unsafe API +******************************************/ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/**************************************************************** +* Helper functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + + +/********************************************************** +* bitStream decoding +**********************************************************/ + +/*!BIT_initDStream +* Initialize a BIT_DStream_t. +* @bitD : a pointer to an already allocated BIT_DStream_t structure +* @srcBuffer must point at the beginning of a bitStream +* @srcSize must be the exact size of the bitStream +* @result : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(size_t)) /* normal case */ + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +/*! BIT_lookBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*!BIT_readBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream +* @return Tells if DStream has reached its exact end +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) +static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + static const char* codeError = "Unspecified error code"; + if (ERR_isError(code)) return ERR_strings[-(int)(code)]; + return codeError; +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/* +Constructor and Destructor of type FSE_CTable + Note that its size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ + + +/* ****************************************************************** + FSE : Finite State Entropy coder + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Static allocation +******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= 1 (otherwise, result will be corrupted) */ + + +/****************************************** +* Implementation of inline functions +******************************************/ + +/* decompression */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Static allocation macros +******************************************/ +/* Huff0 buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of Huff0's DTable */ +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* size_t */ + + +/* ************************************* +* Version +***************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 2 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ + +#if defined (__cplusplus) +} +#endif +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Streaming functions +***************************************/ + +typedef struct ZSTD_DCtx_s ZSTD_DCtx; + +/* + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTD_magicNumber 0xFD2FB522 /* v0.2 (current)*/ + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + FSE : Finite State Entropy coder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#ifndef FSE_COMMONDEFS_ONLY + +/**************************************************************** +* Tuning parameters +****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +#define FSE_DECODE_TYPE FSE_decode_t + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +static size_t FSE_buildDTable +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt+1; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr; + FSE_DTableHeader DTableH; + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + DTableH.tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; i FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return ERROR(GENERIC); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + BIT_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) + { + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ + + return ERROR(corruption_detected); +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/****************************************** +* Helper functions +******************************************/ +static unsigned HUF_isError(size_t code) { return ERR_isError(code); } + +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ +#define HUF_MAX_SYMBOL_VALUE 255 +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + + + +/********************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/*! HUF_readStats + Read compact Huffman tree, saved by HUF_writeCTable + @huffWeight : destination buffer + @return : size read from `src` +*/ +static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + U32 tableLog; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + U32 n; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, hwSize); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + { + U32 total = 1 << tableLog; + U32 rest = total - weightTotal; + U32 verif = 1 << BIT_highbit32(rest); + U32 lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + *tableLogPtr = tableLog; + return iSize+1; +} + + +/**************************/ +/* single-symbol decoding */ +/**************************/ + +static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + U32 nbSymbols = 0; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DEltX2* const dt = (HUF_DEltX2*)ptr; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ + DTable[0] = (U16)tableLog; /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=tableLog; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize; +} + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) + { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + + +static size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const void* ptr = DTable; + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + size_t errorCode; + + errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/***************************/ +/* double-symbols decoding */ +/***************************/ + +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 s; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) + { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + for (s=0; s= 1 */ + + rankVal[weight] += length; + } +} + +typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) /* enough room for a second symbol */ + { + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } + else + { + U32 i; + const U32 end = start + length; + HUF_DEltX4 DElt; + + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + for (i = start; i < end; i++) + DTable[i] = DElt; + } + rankVal[weight] += length; + } +} + +static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; + sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; + U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + const U32 memLog = DTable[0]; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + void* ptr = DTable; + HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ + if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) + {if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */ + + /* Get start index of each weight */ + { + U32 w, nextRankStart = 0; + for (w=1; w<=maxW; w++) + { + U32 current = nextRankStart; + nextRankStart += rankStats[w]; + rankStart[w] = current; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { + U32 s; + for (s=0; s> consumed; + } + } + } + + HUF_fillDTableX4(dt, memLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else + { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) + { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } + } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) + { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + + +static size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U32* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const void* ptr = DTable; + const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; + cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/**********************************/ +/* quad-symbol decoding */ +/**********************************/ +typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; +typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; + +/* recursive, up to level 3; may benefit from