Import sentencepiece_0.1.99.orig.tar.gz
authorKentaro Hayashi <kenhys@xdump.org>
Sat, 17 Jun 2023 15:04:54 +0000 (16:04 +0100)
committerKentaro Hayashi <kenhys@xdump.org>
Sat, 17 Jun 2023 15:04:54 +0000 (16:04 +0100)
commit67b26e9d54150807609ed630f7000c34a1c09fe1
tree98e558617e7c026f6802b64560226a70e8ce4445
Import sentencepiece_0.1.99.orig.tar.gz

[dgit import orig sentencepiece_0.1.99.orig.tar.gz]
239 files changed:
.github/workflows/cifuzz.yml [new file with mode: 0644]
.github/workflows/cmake.yml [new file with mode: 0644]
.github/workflows/wheel.yml [new file with mode: 0644]
.gitignore [new file with mode: 0644]
CMakeLists.txt [new file with mode: 0644]
CONTRIBUTING.md [new file with mode: 0644]
LICENSE [new file with mode: 0644]
README.md [new file with mode: 0644]
VERSION.txt [new file with mode: 0644]
cmake/ios.toolchain.cmake [new file with mode: 0644]
config.h.in [new file with mode: 0644]
data/Scripts.txt [new file with mode: 0644]
data/botchan.txt [new file with mode: 0644]
data/extract_headers.pl [new file with mode: 0755]
data/gen_spec_parser.pl [new file with mode: 0755]
data/gen_unicode_scripts_code.pl [new file with mode: 0755]
data/ids_denorm.tsv [new file with mode: 0644]
data/ids_norm.tsv [new file with mode: 0644]
data/nfkc.tsv [new file with mode: 0644]
data/nfkc_cf.tsv [new file with mode: 0644]
data/nfkd.tsv [new file with mode: 0644]
data/nmt_nfkc.tsv [new file with mode: 0644]
data/nmt_nfkc_cf.tsv [new file with mode: 0644]
data/wagahaiwa_nekodearu.txt [new file with mode: 0644]
doc/api.md [new file with mode: 0644]
doc/experiments.md [new file with mode: 0644]
doc/normalization.md [new file with mode: 0644]
doc/options.md [new file with mode: 0644]
doc/special_symbols.md [new file with mode: 0644]
python/.gitignore [new file with mode: 0644]
python/MANIFEST.in [new file with mode: 0644]
python/README.md [new file with mode: 0644]
python/add_new_vocab.ipynb [new file with mode: 0644]
python/build_bundled.sh [new file with mode: 0755]
python/build_sdist.sh [new file with mode: 0755]
python/sentencepiece_python_module_example.ipynb [new file with mode: 0644]
python/setup.cfg [new file with mode: 0644]
python/setup.py [new file with mode: 0755]
python/src/sentencepiece/__init__.py [new file with mode: 0644]
python/src/sentencepiece/_version.py [new file with mode: 0644]
python/src/sentencepiece/sentencepiece.i [new file with mode: 0644]
python/src/sentencepiece/sentencepiece_model_pb2.py [new file with mode: 0644]
python/src/sentencepiece/sentencepiece_pb2.py [new file with mode: 0644]
python/src/sentencepiece/sentencepiece_wrap.cxx [new file with mode: 0644]
python/test/__init__.py [new file with mode: 0644]
python/test/botchan.txt [new symlink]
python/test/sentencepiece_test.py [new file with mode: 0755]
python/test/test_ja_model.model [new file with mode: 0644]
python/test/test_model.model [new file with mode: 0644]
sentencepiece.pc.in [new file with mode: 0644]
src/CMakeLists.txt [new file with mode: 0644]
src/bpe_model.cc [new file with mode: 0644]
src/bpe_model.h [new file with mode: 0644]
src/bpe_model_test.cc [new file with mode: 0644]
src/bpe_model_trainer.cc [new file with mode: 0644]
src/bpe_model_trainer.h [new file with mode: 0644]
src/bpe_model_trainer_test.cc [new file with mode: 0644]
src/builder.cc [new file with mode: 0644]
src/builder.h [new file with mode: 0644]
src/builder_test.cc [new file with mode: 0644]
src/builtin_pb/sentencepiece.pb.cc [new file with mode: 0644]
src/builtin_pb/sentencepiece.pb.h [new file with mode: 0644]
src/builtin_pb/sentencepiece_model.pb.cc [new file with mode: 0644]
src/builtin_pb/sentencepiece_model.pb.h [new file with mode: 0644]
src/char_model.cc [new file with mode: 0644]
src/char_model.h [new file with mode: 0644]
src/char_model_test.cc [new file with mode: 0644]
src/char_model_trainer.cc [new file with mode: 0644]
src/char_model_trainer.h [new file with mode: 0644]
src/char_model_trainer_test.cc [new file with mode: 0644]
src/common.h [new file with mode: 0644]
src/compile_charsmap_main.cc [new file with mode: 0644]
src/error.cc [new file with mode: 0644]
src/filesystem.cc [new file with mode: 0644]
src/filesystem.h [new file with mode: 0644]
src/filesystem_test.cc [new file with mode: 0644]
src/freelist.h [new file with mode: 0644]
src/freelist_test.cc [new file with mode: 0644]
src/init.h [new file with mode: 0644]
src/init_test.cc [new file with mode: 0644]
src/model_factory.cc [new file with mode: 0644]
src/model_factory.h [new file with mode: 0644]
src/model_factory_test.cc [new file with mode: 0644]
src/model_interface.cc [new file with mode: 0644]
src/model_interface.h [new file with mode: 0644]
src/model_interface_test.cc [new file with mode: 0644]
src/normalization_rule.h [new file with mode: 0644]
src/normalizer.cc [new file with mode: 0644]
src/normalizer.h [new file with mode: 0644]
src/normalizer_test.cc [new file with mode: 0644]
src/pretokenizer_for_training.cc [new file with mode: 0644]
src/pretokenizer_for_training.h [new file with mode: 0644]
src/pretokenizer_for_training_test.cc [new file with mode: 0644]
src/sentencepiece.proto [new file with mode: 0644]
src/sentencepiece_model.proto [new file with mode: 0644]
src/sentencepiece_processor.cc [new file with mode: 0644]
src/sentencepiece_processor.h [new file with mode: 0644]
src/sentencepiece_processor_test.cc [new file with mode: 0644]
src/sentencepiece_trainer.cc [new file with mode: 0644]
src/sentencepiece_trainer.h [new file with mode: 0644]
src/sentencepiece_trainer_test.cc [new file with mode: 0644]
src/spec_parser.h [new file with mode: 0644]
src/spm_decode_main.cc [new file with mode: 0644]
src/spm_encode_main.cc [new file with mode: 0644]
src/spm_export_vocab_main.cc [new file with mode: 0644]
src/spm_normalize_main.cc [new file with mode: 0644]
src/spm_train_main.cc [new file with mode: 0644]
src/test_main.cc [new file with mode: 0644]
src/testharness.cc [new file with mode: 0644]
src/testharness.h [new file with mode: 0644]
src/trainer_factory.cc [new file with mode: 0644]
src/trainer_factory.h [new file with mode: 0644]
src/trainer_factory_test.cc [new file with mode: 0644]
src/trainer_interface.cc [new file with mode: 0644]
src/trainer_interface.h [new file with mode: 0644]
src/trainer_interface_test.cc [new file with mode: 0644]
src/unicode_script.cc [new file with mode: 0644]
src/unicode_script.h [new file with mode: 0644]
src/unicode_script_map.h [new file with mode: 0644]
src/unicode_script_test.cc [new file with mode: 0644]
src/unigram_model.cc [new file with mode: 0644]
src/unigram_model.h [new file with mode: 0644]
src/unigram_model_test.cc [new file with mode: 0644]
src/unigram_model_trainer.cc [new file with mode: 0644]
src/unigram_model_trainer.h [new file with mode: 0644]
src/unigram_model_trainer_test.cc [new file with mode: 0644]
src/util.cc [new file with mode: 0644]
src/util.h [new file with mode: 0644]
src/util_test.cc [new file with mode: 0644]
src/word_model.cc [new file with mode: 0644]
src/word_model.h [new file with mode: 0644]
src/word_model_test.cc [new file with mode: 0644]
src/word_model_trainer.cc [new file with mode: 0644]
src/word_model_trainer.h [new file with mode: 0644]
src/word_model_trainer_test.cc [new file with mode: 0644]
third_party/CMakeLists.txt [new file with mode: 0644]
third_party/absl/LICENSE [new file with mode: 0644]
third_party/absl/container/flat_hash_map.h [new file with mode: 0644]
third_party/absl/container/flat_hash_set.h [new file with mode: 0644]
third_party/absl/flags/flag.cc [new file with mode: 0644]
third_party/absl/flags/flag.h [new file with mode: 0644]
third_party/absl/flags/parse.h [new file with mode: 0644]
third_party/absl/memory/memory.h [new file with mode: 0644]
third_party/absl/random/distributions.h [new file with mode: 0644]
third_party/absl/random/random.h [new file with mode: 0644]
third_party/absl/strings/ascii.h [new file with mode: 0644]
third_party/absl/strings/match.h [new file with mode: 0644]
third_party/absl/strings/numbers.h [new file with mode: 0644]
third_party/absl/strings/str_cat.h [new file with mode: 0644]
third_party/absl/strings/str_format.h [new file with mode: 0644]
third_party/absl/strings/str_join.h [new file with mode: 0644]
third_party/absl/strings/str_replace.h [new file with mode: 0644]
third_party/absl/strings/str_split.h [new file with mode: 0644]
third_party/absl/strings/string_view.h [new file with mode: 0644]
third_party/absl/strings/strip.h [new file with mode: 0644]
third_party/darts_clone/LICENSE [new file with mode: 0644]
third_party/darts_clone/darts.h [new file with mode: 0644]
third_party/esaxx/LICENSE [new file with mode: 0644]
third_party/esaxx/esa.hxx [new file with mode: 0644]
third_party/esaxx/sais.hxx [new file with mode: 0644]
third_party/protobuf-lite/LICENSE [new file with mode: 0644]
third_party/protobuf-lite/arena.cc [new file with mode: 0644]
third_party/protobuf-lite/arenastring.cc [new file with mode: 0644]
third_party/protobuf-lite/bytestream.cc [new file with mode: 0644]
third_party/protobuf-lite/coded_stream.cc [new file with mode: 0644]
third_party/protobuf-lite/common.cc [new file with mode: 0644]
third_party/protobuf-lite/extension_set.cc [new file with mode: 0644]
third_party/protobuf-lite/generated_enum_util.cc [new file with mode: 0644]
third_party/protobuf-lite/generated_message_table_driven_lite.cc [new file with mode: 0644]
third_party/protobuf-lite/generated_message_util.cc [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/any.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/arena.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/arena_impl.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/arenastring.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/descriptor.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/extension_set.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/extension_set_inl.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/generated_enum_reflection.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/generated_enum_util.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/generated_message_table_driven.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/generated_message_table_driven_lite.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/generated_message_util.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/has_bits.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/implicit_weak_message.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/io/coded_stream.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/io/io_win32.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/io/zero_copy_stream.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl_lite.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/map.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/map_entry_lite.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/map_field_lite.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/map_type_handler.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/message_lite.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/metadata_lite.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/parse_context.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/port.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/port_def.inc [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/port_undef.inc [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/repeated_field.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/bytestream.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/callback.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/casts.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/common.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/hash.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/int128.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/logging.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/macros.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/map_util.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/mutex.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/once.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/platform_macros.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/port.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/status.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/statusor.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/stl_util.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/stringpiece.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/stringprintf.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/strutil.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/stubs/time.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/unknown_field_set.h [new file with mode: 0644]
third_party/protobuf-lite/google/protobuf/wire_format_lite.h [new file with mode: 0644]
third_party/protobuf-lite/implicit_weak_message.cc [new file with mode: 0644]
third_party/protobuf-lite/int128.cc [new file with mode: 0644]
third_party/protobuf-lite/io_win32.cc [new file with mode: 0644]
third_party/protobuf-lite/message_lite.cc [new file with mode: 0644]
third_party/protobuf-lite/parse_context.cc [new file with mode: 0644]
third_party/protobuf-lite/repeated_field.cc [new file with mode: 0644]
third_party/protobuf-lite/status.cc [new file with mode: 0644]
third_party/protobuf-lite/statusor.cc [new file with mode: 0644]
third_party/protobuf-lite/stringpiece.cc [new file with mode: 0644]
third_party/protobuf-lite/stringprintf.cc [new file with mode: 0644]
third_party/protobuf-lite/structurally_valid.cc [new file with mode: 0644]
third_party/protobuf-lite/strutil.cc [new file with mode: 0644]
third_party/protobuf-lite/time.cc [new file with mode: 0644]
third_party/protobuf-lite/wire_format_lite.cc [new file with mode: 0644]
third_party/protobuf-lite/zero_copy_stream.cc [new file with mode: 0644]
third_party/protobuf-lite/zero_copy_stream_impl.cc [new file with mode: 0644]
third_party/protobuf-lite/zero_copy_stream_impl_lite.cc [new file with mode: 0644]