From: Kentaro Hayashi Date: Sat, 19 Aug 2023 08:14:56 +0000 (+0100) Subject: sentencepiece (0.1.99-4) unstable; urgency=medium X-Git-Tag: archive/raspbian/0.2.0-1+rpi1^2^2~5 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=23d4d1a98e8358085940f3eab8b47c237ab99853;p=sentencepiece.git sentencepiece (0.1.99-4) unstable; urgency=medium * debian/clean - Fix FTBFS (double build) (Closes: #1047552) [dgit import unpatched sentencepiece 0.1.99-4] --- 23d4d1a98e8358085940f3eab8b47c237ab99853 diff --cc debian/README.Debian index 0000000,0000000..3aa4ad1 new file mode 100644 --- /dev/null +++ b/debian/README.Debian @@@ -1,0 -1,0 +1,48 @@@ ++# senencepiece for Debian ++ ++The record of sentencepiece package specific information. ++ ++## trixie or later ++ ++Removed already applied upsteam's patches which was introduced 0.1.97-3. ++ ++## bookworm ++ ++The upstream of sentencepiece 0.1.97 was initially released around June 6, 2022, ++but it was pre-release version. ++The official release version was shipped at Aug 7, 2022. ++ ++Accidentally, pre-release version was packaged as 0.1.97-1. ++Thus, some commits were not included into 0.1.97-1. ++ ++To fix up this issue, commits since 5e5adf2f851a1514ccc435aae11ee830c438321b ++were applied as the following patch files. ++ ++See https://github.com/google/sentencepiece/issues/794 about detail. ++ ++0001-update-python-wrapper.patch ++0002-remove-debug-symbols-from-wheel-package.patch ++0003-allow-tab-character-to-be-used-in-user_defined_symbo.patch ++0004-add-test-to-use-tab-as-user-defined-symbols.patch ++0005-Uses-C-17-by-default.patch ++0006-Uses-std-atomic-to-define-global-variable.patch ++0007-Fix-a-typo.patch ++0008-Uses-absl-string_view-as-much-as-possible.patch ++0009-Fixed-build-break.patch ++0010-Added-ImmutableSentencePiece-class.patch ++0011-add-verbose-option.patch ++0012-Supports-ImmutableSentencePieceText-from-python-modu.patch ++0013-Adds-more-unittests.patch ++0014-Adds-SWIGPYTHON-flag.patch ++0015-remove-unused-ifdef-SWIG-macro.patch ++0016-Fixed-test-failure.patch ++0017-Uses-property-in-immutable-proto.patch ++0018-automatically-detect-the-number-of-CPUs-in-batch-pro.patch ++0019-support-slice-in-pieces-nbests-objects.patch ++0020-Updated-the-document.patch ++0021-Fixed-errors-in-example-notebook.patch ++0022-Fix-dead-links.patch ++0023-added-ShutdownLibrary-function-to-uninitialize-globa.patch ++0024-Fixed-the-issue-of-concatinating-paths-for-pkg-confi.patch ++ ++ -- kenhys , Sat, 17 Jun 2023 23:16:19 +0900 diff --cc debian/changelog index 0000000,0000000..d2ab754 new file mode 100644 --- /dev/null +++ b/debian/changelog @@@ -1,0 -1,0 +1,221 @@@ ++sentencepiece (0.1.99-4) unstable; urgency=medium ++ ++ * debian/clean ++ - Fix FTBFS (double build) (Closes: #1047552) ++ ++ -- Kentaro Hayashi Sat, 19 Aug 2023 17:14:56 +0900 ++ ++sentencepiece (0.1.99-3) unstable; urgency=medium ++ ++ * debian/tests/control ++ - Fix regression (preventing migration) about ++ python module's autopkgtest. ++ ++ -- Kentaro Hayashi Wed, 21 Jun 2023 14:44:27 +0900 ++ ++sentencepiece (0.1.99-2) unstable; urgency=medium ++ ++ * debian/patches/fix-ftbfs-big-endian.patch ++ - Add patch to fix FTBFS on big endian platform. ++ * debian/tests/control ++ - Fix W: illegal-runtime-test-name warning ++ ++ -- Kentaro Hayashi Tue, 20 Jun 2023 19:28:50 +0900 ++ ++sentencepiece (0.1.99-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.99 ++ * debian/control ++ - Bump Standards-Version to 4.6.2. No other changes are required. ++ * debian/patches/disable-static-library.patch ++ debian/patches/support-python-module-in-place.patch ++ - Refresh patch files for 0.1.99 ++ * debian/patches/*.patch ++ - Drop deprecated patch files which was already applied in upstream. ++ * debian/README.Debian ++ - Update explanation of debian/patches. ++ ++ -- Kentaro Hayashi Sun, 18 Jun 2023 00:04:54 +0900 ++ ++sentencepiece (0.1.97-3) unstable; urgency=medium ++ ++ * debian/patches/0001-update-python-wrapper.patch ++ debian/patches/0002-remove-debug-symbols-from-wheel-package.patch ++ debian/patches/0003-allow-tab-character-to-be-used-in-user_defined_symbo.patch ++ debian/patches/0004-add-test-to-use-tab-as-user-defined-symbols.patch ++ debian/patches/0005-Uses-C-17-by-default.patch ++ debian/patches/0006-Uses-std-atomic-to-define-global-variable.patch ++ debian/patches/0007-Fix-a-typo.patch ++ debian/patches/0008-Uses-absl-string_view-as-much-as-possible.patch ++ debian/patches/0009-Fixed-build-break.patch ++ debian/patches/0010-Added-ImmutableSentencePiece-class.patch ++ debian/patches/0011-add-verbose-option.patch ++ debian/patches/0012-Supports-ImmutableSentencePieceText-from-python-modu.patch ++ debian/patches/0013-Adds-more-unittests.patch ++ debian/patches/0014-Adds-SWIGPYTHON-flag.patch ++ debian/patches/0015-remove-unused-ifdef-SWIG-macro.patch ++ debian/patches/0016-Fixed-test-failure.patch ++ debian/patches/0017-Uses-property-in-immutable-proto.patch ++ debian/patches/0018-automatically-detect-the-number-of-CPUs-in-batch-pro.patch ++ debian/patches/0019-support-slice-in-pieces-nbests-objects.patch ++ debian/patches/0020-Updated-the-document.patch ++ debian/patches/0021-Fixed-errors-in-example-notebook.patch ++ debian/patches/0022-Fix-dead-links.patch ++ debian/patches/0023-added-ShutdownLibrary-function-to-uninitialize-globa.patch ++ debian/patches/0024-Fixed-the-issue-of-concatinating-paths-for-pkg-confi.patch ++ - Add missing patch files for 0.1.97. ++ * debian/README.Debian ++ - Add explanation of debian/patches. ++ ++ -- Kentaro Hayashi Mon, 21 Nov 2022 22:43:46 +0900 ++ ++sentencepiece (0.1.97-2) unstable; urgency=medium ++ ++ * Team upload ++ ++ [ Steve Langasek ] ++ * debian/patches/header-dependencies.patch: include necessary headers ++ to ensure IS_BIG_ENDIAN is defined, see #1017360. ++ ++ -- Graham Inggs Sun, 18 Sep 2022 05:30:57 +0000 ++ ++sentencepiece (0.1.97-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.97 ++ * debian/copyright ++ - Update maintainer E-mail address ++ * debian/control ++ - Bump Standards-Version to 4.6.1. No other changes are required. ++ * debian/patches/support-python-module-in-place.patch ++ - Refresh path to build python module. ++ ++ -- Kentaro Hayashi Tue, 14 Jun 2022 20:19:58 +0900 ++ ++sentencepiece (0.1.96-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.96 ++ * debian/control ++ - Bump standard-version to 4.5.1. No changes are required. ++ ++ -- Kentaro Hayashi Wed, 18 Aug 2021 20:52:46 +0900 ++ ++sentencepiece (0.1.95-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.95 ++ * debian/patches/support-python-module-in-place.patch ++ - Fix undefined symbol when importing python module (Closes: #979040) ++ ++ -- Kentaro Hayashi Thu, 11 Feb 2021 17:36:23 +0900 ++ ++sentencepiece (0.1.94-2) unstable; urgency=medium ++ ++ * Fix FTBFS on armel/mipsel (Closes: #977235) ++ ++ -- Kentaro Hayashi Wed, 16 Dec 2020 21:18:15 +0900 ++ ++sentencepiece (0.1.94-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.94 ++ * debian/patches/support-python-module-in-place.patch ++ - Refresh path to build python module. ++ * debian/patches/fix-ftbfs-ports.patch ++ debian/patches/mutiarch-support.patch ++ - Remove needless patch because these patch was merged ++ to google/sentencepiece. ++ ++ -- Kentaro Hayashi Wed, 28 Oct 2020 21:02:07 +0900 ++ ++sentencepiece (0.1.93-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.93 ++ * debian/source/lintian-overrides ++ - Remove needless override. ++ ++ -- Kentaro Hayashi Thu, 15 Oct 2020 21:32:05 +0900 ++ ++sentencepiece (0.1.92-3) unstable; urgency=medium ++ ++ * debian/patches/fix-ftbfs-ports.patch ++ - Fix FTBFS on powerpc ++ ++ -- Kentaro Hayashi Sat, 03 Oct 2020 20:48:27 +0900 ++ ++sentencepiece (0.1.92-2) unstable; urgency=medium ++ ++ * debian/patches/0002-Change-in-order-to-build-Python-modules-in-place.patch ++ - Fix FTBFS on hurd-i386 ++ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch ++ - Fix missing dependency to atomic library (powerpc,m68k,sh4) ++ ++ -- Kentaro Hayashi Sat, 26 Sep 2020 20:27:17 +0900 ++ ++sentencepiece (0.1.92-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.92 ++ ++ -- Kentaro Hayashi Fri, 19 Jun 2020 19:38:49 +0900 ++ ++sentencepiece (0.1.91-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.91 ++ ++ -- Kentaro Hayashi Fri, 22 May 2020 15:17:42 +0900 ++ ++sentencepiece (0.1.90-3) unstable; urgency=medium ++ ++ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch ++ - Refresh patch to fix FTBFS. ++ ++ -- Kentaro Hayashi Sun, 17 May 2020 09:02:23 +0900 ++ ++sentencepiece (0.1.90-2) unstable; urgency=medium ++ ++ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch ++ - Add patch to fix FTBFS on mipsel and armel ++ ++ -- Kentaro Hayashi Sat, 16 May 2020 16:16:45 +0900 ++ ++sentencepiece (0.1.90-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.90 ++ * debian/control ++ - Update Uploaders: ++ - Bump standard-version to 4.5.0 ++ - Bump compat version to 13. ++ * debian/source/lintian-overrides ++ - Fix false positive source-is-missing ++ * debian/patches/0003-Disable-static-library-explicitly.patch ++ - Disable to build static library ++ ++ -- Kentaro Hayashi Wed, 13 May 2020 19:09:34 +0900 ++ ++sentencepiece (0.1.84-1) unstable; urgency=medium ++ ++ * New upstream version 0.1.84 (Closes: #939860) ++ ++ [ TSUCHIYA Masatoshi ] ++ * Initial packaging tasks. ++ * Remove pipeline configurations for BitBucket. ++ ++ [ Kentaro Hayashi ] ++ * debian/gbp.conf ++ - Add basic configuration about debian-branch ++ * debian/watch ++ - Add missing watch file to detect a new release ++ * debian/control ++ - Update deprecated Priority: to optional ++ - Add Vcs-* fields ++ - Fix W: sentencepiece: description-synopsis-starts-with-article ++ - Bump standard version to 4.4.1 ++ - Update Vcs-* under science-team ++ - Bump up compatibility level ++ - Drop python2 support ++ * debian/copyright ++ - Use https:// ++ - Update copyright about third party modules ++ * debian/rules ++ - Enable hardening ++ * debian/salsa-ci.yml ++ - Add Salsa CI configuration ++ ++ -- Kentaro Hayashi Thu, 17 Oct 2019 13:33:34 +0900 diff --cc debian/clean index 0000000,0000000..3dfef3f new file mode 100644 --- /dev/null +++ b/debian/clean @@@ -1,0 -1,0 +1,2 @@@ ++python/build/ ++python/src/sentencepiece.egg-info/ diff --cc debian/control index 0000000,0000000..74023b8 new file mode 100644 --- /dev/null +++ b/debian/control @@@ -1,0 -1,0 +1,61 @@@ ++Source: sentencepiece ++Section: science ++Priority: optional ++Maintainer: Debian Science Maintainers ++Uploaders: ++ TSUCHIYA Masatoshi , ++ Kentaro Hayashi ++Build-Depends: ++ debhelper-compat (= 13), ++ protobuf-compiler, ++ libprotobuf-dev, ++ dh-python, ++ python3-all-dev, ++ quilt, ++ cmake, ++ python3-setuptools ++Standards-Version: 4.6.2 ++Homepage: https://github.com/google/sentencepiece ++Vcs-Browser: https://salsa.debian.org/science-team/sentencepiece ++Vcs-Git: https://salsa.debian.org/science-team/sentencepiece.git ++Rules-Requires-Root: no ++ ++Package: sentencepiece ++Architecture: any ++Depends: ${shlibs:Depends}, ${misc:Depends} ++Description: Unsupervised text tokenizer and detokenizer ++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly ++ designed for Neural Network-based text generation systems where the ++ vocabulary size is predetermined prior to the neural model training. ++ ++Package: libsentencepiece0 ++Section: libs ++Architecture: any ++Depends: ${shlibs:Depends}, ${misc:Depends} ++Description: Library files of SentencePiece ++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly ++ designed for Neural Network-based text generation systems where the ++ vocabulary size is predetermined prior to the neural model training. ++ ++Package: libsentencepiece-dev ++Section: libdevel ++Architecture: any ++Depends: libsentencepiece0 (= ${binary:Version}), ${misc:Depends} ++Description: Header files of SentencePiece ++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly ++ designed for Neural Network-based text generation systems where the ++ vocabulary size is predetermined prior to the neural model training. ++ ++Package: python3-sentencepiece ++Section: python ++Architecture: any ++Depends: ++ ${shlibs:Depends}, ++ ${misc:Depends}, ++ ${python3:Depends} ++Description: SentencePiece binding for Python3 ++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly ++ designed for Neural Network-based text generation systems where the ++ vocabulary size is predetermined prior to the neural model training. ++ . ++ python3-sentencepiece is its binding for Python3. diff --cc debian/copyright index 0000000,0000000..17b9239 new file mode 100644 --- /dev/null +++ b/debian/copyright @@@ -1,0 -1,0 +1,150 @@@ ++Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ ++Upstream-Name: sentencepiece ++Source: https://github.com/google/sentencepiece ++ ++Files: * ++Copyright: 2017 Taku Kudo ++License: Apache-2.0 ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ . ++ http://www.apache.org/licenses/LICENSE-2.0 ++ . ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or ++ implied. See the License for the specific language governing ++ permissions and limitations under the License. ++ ++Files: debian/* ++Copyright: ++ 2016 TSUCHIYA Masatoshi ++ 2019-2022 Kentaro Hayashi ++License: GPL-2+ ++ This package is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2 of the License, or ++ (at your option) any later version. ++ . ++ This package is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ . ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see ++ . ++ On Debian systems, the complete text of the GNU General ++ Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". ++ ++Files: third_party/esaxx/* ++Copyright: 2010 Daisuke Okanohara ++License: MIT ++ ++Files: third_party/darts_clone/* ++Copyright: 2008-2011, Susumu Yata ++License: BSD-3-clause ++ ++Files: third_party/protobuf-lite/* ++Copyright: 2008 Google Inc. ++License: BSD-3-clause ++ ++Files: data/Scripts.txt ++Copyright: 1991-2016 Unicode, Inc. ++License: Unicode ++ COPYRIGHT AND PERMISSION NOTICE ++ . ++ Copyright © 1991-2016 Unicode, Inc. All rights reserved. ++ Distributed under the Terms of Use in https://www.unicode.org/copyright.html. ++ . ++ Permission is hereby granted, free of charge, to any person obtaining ++ a copy of the Unicode data files and any associated documentation ++ (the "Data Files") or Unicode software and any associated documentation ++ (the "Software") to deal in the Data Files or Software ++ without restriction, including without limitation the rights to use, ++ copy, modify, merge, publish, distribute, and/or sell copies of ++ the Data Files or Software, and to permit persons to whom the Data Files ++ or Software are furnished to do so, provided that either ++ (a) this copyright and permission notice appear with all copies ++ of the Data Files or Software, or ++ (b) this copyright and permission notice appear in associated ++ Documentation. ++ . ++ THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ++ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE ++ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT OF THIRD PARTY RIGHTS. ++ IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS ++ NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL ++ DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, ++ DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER ++ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR ++ PERFORMANCE OF THE DATA FILES OR SOFTWARE. ++ . ++ Except as contained in this notice, the name of a copyright holder ++ shall not be used in advertising or otherwise to promote the sale, ++ use or other dealings in these Data Files or Software without prior ++ written authorization of the copyright holder. ++ ++Files: data/botchan.txt ++Copyright: Kin-nosuke Natsume ++License: public-domain ++ Written by Kin-nosuke Natume and put into the public domain. ++ It's transalted by Yasotaro Morri and published by Project Gutenberg. ++ ++Files: data/wagahaiwa_nekodearu.txt ++Copyright: Kin-nosuke Natsume ++License: public-domain ++ Written by Kin-nosuke Natume and put into the public domain. ++ It's digitized by Aozora Bunko collabolator and published by Aozora Bunko. ++ ++License: MIT ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, ++ copy, modify, merge, publish, distribute, sublicense, and/or sell ++ copies of the Software, and to permit persons to whom the ++ Software is furnished to do so, subject to the following ++ conditions: ++ . ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ . ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ OTHER DEALINGS IN THE SOFTWARE. ++ ++License: BSD-3-clause ++ Redistribution and use in source and binary forms, with or without ++ modificatio n, are permitted provided that the following conditions ++ are met: ++ . ++ - Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ - Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the ++ distribution. ++ - Neither the name of the nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ . ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ diff --cc debian/gbp.conf index 0000000,0000000..7c93e18 new file mode 100644 --- /dev/null +++ b/debian/gbp.conf @@@ -1,0 -1,0 +1,3 @@@ ++[DEFAULT] ++debian-branch = master ++ diff --cc debian/libsentencepiece-dev.install index 0000000,0000000..b363748 new file mode 100644 --- /dev/null +++ b/debian/libsentencepiece-dev.install @@@ -1,0 -1,0 +1,3 @@@ ++usr/lib/*/lib*.so ++usr/lib/*/pkgconfig/* ++usr/include/* diff --cc debian/libsentencepiece0.install index 0000000,0000000..3ddde58 new file mode 100644 --- /dev/null +++ b/debian/libsentencepiece0.install @@@ -1,0 -1,0 +1,1 @@@ ++usr/lib/*/lib*.so.* diff --cc debian/patches/disable-static-library.patch index 0000000,0000000..203e3ad new file mode 100644 --- /dev/null +++ b/debian/patches/disable-static-library.patch @@@ -1,0 -1,0 +1,42 @@@ ++From: Kentaro Hayashi ++Date: Sat, 17 Jun 2023 22:47:25 +0900 ++Subject: Disable static library explicitly ++ ++Forwarded: not-needed ++Bug-Debian: N/A ++--- ++ src/CMakeLists.txt | 11 +---------- ++ 1 file changed, 1 insertion(+), 10 deletions(-) ++ ++diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt ++index 1c7726e..077d37d 100644 ++--- a/src/CMakeLists.txt +++++ b/src/CMakeLists.txt ++@@ -222,16 +222,10 @@ if (SPM_ENABLE_SHARED) ++ add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) ++ endif() ++ ++-add_library(sentencepiece-static STATIC ${SPM_SRCS}) ++-add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS}) ++- ++-target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS}) ++-target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS}) ++- ++ if (SPM_ENABLE_SHARED) ++ target_link_libraries(sentencepiece ${SPM_LIBS}) ++ target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) ++- set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) +++ set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train) ++ set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) ++ set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) ++ set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) ++@@ -248,9 +242,6 @@ else() ++ set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) ++ endif() ++ ++-set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") ++-set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") ++- ++ if (NOT MSVC) ++ if (SPM_COVERAGE) ++ set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}") diff --cc debian/patches/fix-ftbfs-big-endian.patch index 0000000,0000000..3f7db6a new file mode 100644 --- /dev/null +++ b/debian/patches/fix-ftbfs-big-endian.patch @@@ -1,0 -1,0 +1,155 @@@ ++From: Kentaro Hayashi ++Date: Tue, 20 Jun 2023 17:12:58 +0900 ++Subject: Fixes build test errors in big-endian machines ++ ++Author: Taku Kudo ++Origin: https://github.com/google/sentencepiece/commit/827591a0c552f2187aac8b8e0f999e8ff31aad81.patch ++Forwarded: not-needed ++--- ++ CMakeLists.txt | 5 +++++ ++ src/CMakeLists.txt | 2 ++ ++ src/common.h | 10 ++++++---- ++ src/normalizer.cc | 15 ++++++++------- ++ src/unigram_model_trainer_test.cc | 5 ++--- ++ 5 files changed, 23 insertions(+), 14 deletions(-) ++ ++diff --git a/CMakeLists.txt b/CMakeLists.txt ++index 1b3af04..a2f0f77 100644 ++--- a/CMakeLists.txt +++++ b/CMakeLists.txt ++@@ -33,6 +33,11 @@ option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF) ++ option(SPM_USE_BUILTIN_PROTOBUF "Use built-in protobuf" ON) ++ option(SPM_USE_EXTERNAL_ABSL "Use external abseil" OFF) ++ option(SPM_ENABLE_MSVC_MT_BUILD, "Use /MT flag in MSVC build" OFF) +++option(SPM_CROSS_SYSTEM_PROCESSOR, "Override system processor" "") +++ +++if (SPM_CROSS_SYSTEM_PROCESSOR) +++ set(CMAKE_SYSTEM_PROCESSOR ${SPM_CROSS_SYSTEM_PROCESSOR}) +++endif() ++ ++ # Disable shared build on windows ++ if(WIN32) ++diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt ++index 077d37d..09ef57f 100644 ++--- a/src/CMakeLists.txt +++++ b/src/CMakeLists.txt ++@@ -208,6 +208,7 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR ++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips") OR ++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "m68k") OR ++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc") OR +++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") OR ++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch") OR ++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "sh4")) ++ find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1) ++@@ -217,6 +218,7 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR ++ endif() ++ endif() ++ +++ ++ if (SPM_ENABLE_SHARED) ++ add_library(sentencepiece SHARED ${SPM_SRCS}) ++ add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) ++diff --git a/src/common.h b/src/common.h ++index ef5546d..b38b3f7 100644 ++--- a/src/common.h +++++ b/src/common.h ++@@ -79,10 +79,6 @@ char (&ArraySizeHelper(const T (&array)[N]))[N]; ++ #endif ++ #endif ++ ++-#ifdef IS_BIG_ENDIAN ++-inline uint32 Swap32(uint32 x) { return __builtin_bswap32(x); } ++-#endif ++- ++ namespace sentencepiece { ++ #ifdef OS_WIN ++ namespace win32 { ++@@ -90,6 +86,12 @@ std::wstring Utf8ToWide(const absl::string_view input); ++ } // namespace win32 ++ #endif ++ +++#ifdef IS_BIG_ENDIAN +++namespace util { +++inline uint32 Swap32(uint32 x) { return __builtin_bswap32(x); } +++} // namespace util +++#endif +++ ++ namespace error { ++ ++ void Abort(); ++diff --git a/src/normalizer.cc b/src/normalizer.cc ++index 2ab8084..53e43c4 100644 ++--- a/src/normalizer.cc +++++ b/src/normalizer.cc ++@@ -260,14 +260,14 @@ std::string Normalizer::EncodePrecompiledCharsMap( ++ std::string blob; ++ blob.append(string_util::EncodePOD(trie_blob.size())); ++ blob.append(trie_blob.data(), trie_blob.size()); ++- blob.append(normalized.data(), normalized.size()); ++ ++ #ifdef IS_BIG_ENDIAN ++ uint32 *data = reinterpret_cast(const_cast(blob.data())); ++- for (int i = 0; i <= trie_blob.size() / 4; ++i) ++- data[i] = util::Swap32(data[i]); +++ for (int i = 0; i < blob.size() / 4; ++i) data[i] = util::Swap32(data[i]); ++ #endif ++ +++ blob.append(normalized.data(), normalized.size()); +++ ++ return blob; ++ } ++ ++@@ -279,8 +279,7 @@ util::Status Normalizer::DecodePrecompiledCharsMap( ++ if (blob.size() <= sizeof(trie_blob_size) || ++ !string_util::DecodePOD( ++ absl::string_view(blob.data(), sizeof(trie_blob_size)), ++- &trie_blob_size) || ++- trie_blob_size >= blob.size()) { +++ &trie_blob_size)) { ++ return util::InternalError("Blob for normalization rule is broken."); ++ } ++ ++@@ -288,15 +287,17 @@ util::Status Normalizer::DecodePrecompiledCharsMap( ++ trie_blob_size = util::Swap32(trie_blob_size); ++ #endif ++ ++- if (trie_blob_size >= blob.size()) +++ if (trie_blob_size >= blob.size()) { ++ return util::InternalError("Trie data size exceeds the input blob size."); +++ } ++ ++ blob.remove_prefix(sizeof(trie_blob_size)); ++ ++ #ifdef IS_BIG_ENDIAN +++ CHECK_OR_RETURN(buffer); ++ buffer->assign(blob.data(), trie_blob_size); ++ uint32 *data = reinterpret_cast(const_cast(buffer->data())); ++- for (int i = 0; i < trie_blob_size / 4; ++i) data[i] = util::Swap32(data[i]); +++ for (int i = 0; i < buffer->size() / 4; ++i) data[i] = util::Swap32(data[i]); ++ *trie_blob = absl::string_view(buffer->data(), trie_blob_size); ++ #else ++ *trie_blob = absl::string_view(blob.data(), trie_blob_size); ++diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc ++index 9d2c526..31da90b 100644 ++--- a/src/unigram_model_trainer_test.cc +++++ b/src/unigram_model_trainer_test.cc ++@@ -106,6 +106,7 @@ TrainerResult RunTrainer(const std::vector& input, int size, ++ ++ TrainerResult res; ++ res.seed_pieces_and_probs = seed_pieces; +++ std::sort(pieces.begin(), pieces.end()); ++ res.sentence_pieces = absl::StrJoin(pieces, " "); ++ return res; ++ } ++@@ -119,10 +120,8 @@ TEST(UnigramTrainerTest, BasicTest) { ++ // Check seed pieces. ++ EXPECT_EQ(27, res.seed_pieces_and_probs.size()); ++ ++- LOG(INFO) << "[" << res.sentence_pieces << "]"; ++- ++ // Check final pieces. ++- EXPECT_EQ("i a n y m l e apple ve O P r g t an v ▁ b A le ▁an p d h", +++ EXPECT_EQ("A O P a an apple b d e g h i l le m n p r t v ve y ▁ ▁an", ++ res.sentence_pieces); ++ } ++ diff --cc debian/patches/header-dependencies.patch index 0000000,0000000..2823de7 new file mode 100644 --- /dev/null +++ b/debian/patches/header-dependencies.patch @@@ -1,0 -1,0 +1,27 @@@ ++From: Kentaro Hayashi ++Date: Mon, 21 Nov 2022 22:17:18 +0900 ++Subject: Include necessary headers to ensure IS_BIG_ENDIAN is defined ++ ++normalizer.h uses IS_BIG_ENDIAN, which is defined in util.h. ++Include util.h here. ++ ++Author: Steve Langasek ++Last-Update: 2022-08-27 ++Forwarded: no ++Bug-Debian: https://bugs.debian.org/1017360 ++--- ++ src/normalizer.h | 1 + ++ 1 file changed, 1 insertion(+) ++ ++diff --git a/src/normalizer.h b/src/normalizer.h ++index c79813c..37fdb8a 100644 ++--- a/src/normalizer.h +++++ b/src/normalizer.h ++@@ -22,6 +22,7 @@ ++ #include ++ ++ #include "common.h" +++#include "util.h" ++ #include "sentencepiece_model.pb.h" ++ #include "sentencepiece_processor.h" ++ #include "third_party/absl/strings/string_view.h" diff --cc debian/patches/series index 0000000,0000000..6a7db4a new file mode 100644 --- /dev/null +++ b/debian/patches/series @@@ -1,0 -1,0 +1,4 @@@ ++disable-static-library.patch ++support-python-module-in-place.patch ++header-dependencies.patch ++fix-ftbfs-big-endian.patch diff --cc debian/patches/support-python-module-in-place.patch index 0000000,0000000..463bec2 new file mode 100644 --- /dev/null +++ b/debian/patches/support-python-module-in-place.patch @@@ -1,0 -1,0 +1,56 @@@ ++From: Kentaro Hayashi ++Date: Sat, 17 Jun 2023 22:39:14 +0900 ++Subject: Support to build Python module without pkg-config ++ ++--- ++ python/setup.py | 34 ++++++++++++++++++++-------------- ++ 1 file changed, 20 insertions(+), 14 deletions(-) ++ ++diff --git a/python/setup.py b/python/setup.py ++index 5411231..631a8c4 100755 ++--- a/python/setup.py +++++ b/python/setup.py ++@@ -77,23 +77,29 @@ class build_ext(_build_ext): ++ """Override build_extension to run cmake.""" ++ ++ def build_extension(self, ext): ++- cflags, libs = get_cflags_and_libs('../build/root') ++- ++- if len(libs) == 0: ++- if is_sentencepiece_installed(): ++- cflags = cflags + run_pkg_config('cflags') ++- libs = run_pkg_config('libs') ++- else: ++- subprocess.check_call(['./build_bundled.sh', __version__]) ++- cflags, libs = get_cflags_and_libs('./build/root') +++ # cflags, libs = get_cflags_and_libs('../build/root') +++ # if len(libs) == 0: +++ # cflags, libs = get_cflags_and_libs('./bundled/root') +++ +++ # if len(libs) == 0: +++ # if is_sentencepiece_installed(): +++ # cflags = cflags + run_pkg_config('cflags') +++ # libs = run_pkg_config('libs') +++ # else: +++ # subprocess.check_call(['./build_bundled.sh', __version__]) +++ # cflags, libs = get_cflags_and_libs('./bundled/root') ++ ++ # Fix compile on some versions of Mac OSX ++ # See: https://github.com/neulab/xnmt/issues/199 ++- if sys.platform == 'darwin': ++- cflags.append('-mmacosx-version-min=10.9') ++- else: ++- cflags.append('-Wl,-strip-all') ++- libs.append('-Wl,-strip-all') +++ # if sys.platform == 'darwin': +++ # cflags.append('-mmacosx-version-min=10.9') +++ # else: +++ # cflags.append('-Wl,-strip-all') +++ # libs.append('-Wl,-strip-all') +++ cflags = ['-I../src'] +++ cmd = "dpkg-architecture -q DEB_BUILD_GNU_TYPE" +++ arch = subprocess.check_output(cmd, shell=True).decode("utf-8").strip().split()[0] +++ libs = ["-L../obj-%s/src" % arch, "-lsentencepiece", "-lsentencepiece_train"] ++ print('## cflags={}'.format(' '.join(cflags))) ++ print('## libs={}'.format(' '.join(libs))) ++ ext.extra_compile_args = cflags diff --cc debian/python3-sentencepiece.install index 0000000,0000000..0cde274 new file mode 100644 --- /dev/null +++ b/debian/python3-sentencepiece.install @@@ -1,0 -1,0 +1,1 @@@ ++usr/lib/python3.*/ diff --cc debian/rules index 0000000,0000000..e0dcf54 new file mode 100755 --- /dev/null +++ b/debian/rules @@@ -1,0 -1,0 +1,41 @@@ ++#!/usr/bin/make -f ++# -*- makefile -*- ++# Sample debian/rules that uses debhelper. ++# This file was originally written by Joey Hess and Craig Small. ++# As a special exception, when this file is copied by dh-make into a ++# dh-make output file, you may use that output file without restriction. ++# This special exception was added by Craig Small in version 0.37 of dh-make. ++ ++# Uncomment this to turn on verbose mode. ++#export DH_VERBOSE=1 ++export DEB_BUILD_MAINT_OPTIONS = hardening=+all ++DPKG_EXPORT_BUILDFLAGS = 1 ++include /usr/share/dpkg/buildflags.mk ++ ++ifneq (,$(filter $(DEB_HOST_ARCH), armel mipsel m68k powerpc sh4)) ++ export DEB_LDFLAGS_MAINT_APPEND += -Wl,--no-as-needed -latomic -Wl,--as-needed ++endif ++ ++%: ++ dh $@ --with python3 --buildsystem=cmake ++ ++override_dh_auto_configure: ++ dh_auto_configure --buildsystem=cmake ++ dh_auto_configure --sourcedirectory=python --buildsystem=pybuild ++ ++override_dh_auto_build: ++ dh_auto_build --buildsystem=cmake ++ dh_auto_build --sourcedirectory=python --buildsystem=pybuild ++ ++override_dh_auto_install: basedir=$(shell pwd)/debian ++override_dh_auto_install: ++ dh_auto_install --buildsystem=cmake ++ dh_auto_install --sourcedirectory=python --buildsystem=pybuild ++ ++override_dh_auto_clean: ++ dh_auto_clean --buildsystem=cmake ++ -rm -rf .pybuild ++ -rm -rf .python/sentencepiece.egg-info ++ ++# Do no tests. ++override_dh_auto_test: diff --cc debian/salsa-ci.yml index 0000000,0000000..1d8d33b new file mode 100644 --- /dev/null +++ b/debian/salsa-ci.yml @@@ -1,0 -1,0 +1,7 @@@ ++--- ++include: ++ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml ++ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml ++ ++reprotest: ++ allow_failure: true diff --cc debian/sentencepiece.docs index 0000000,0000000..8d15174 new file mode 100644 --- /dev/null +++ b/debian/sentencepiece.docs @@@ -1,0 -1,0 +1,1 @@@ ++doc/*.md diff --cc debian/sentencepiece.install index 0000000,0000000..1df36c6 new file mode 100644 --- /dev/null +++ b/debian/sentencepiece.install @@@ -1,0 -1,0 +1,1 @@@ ++usr/bin/* diff --cc debian/sentencepiece.xml index 0000000,0000000..2a81db2 new file mode 100644 --- /dev/null +++ b/debian/sentencepiece.xml @@@ -1,0 -1,0 +1,291 @@@ ++ ++.
will be generated. You may view the ++manual page with: nroff -man .
| less'. A typical entry ++in a Makefile or Makefile.am is: ++ ++DB2MAN = /usr/share/sgml/docbook/stylesheet/xsl/nwalsh/manpages/docbook.xsl ++XP = xsltproc -''-nonet -''-param man.charmap.use.subset "0" ++ ++manpage.1: manpage.xml ++ $(XP) $(DB2MAN) $< ++ ++The xsltproc binary is found in the xsltproc package. The XSL files are in ++docbook-xsl. A description of the parameters you can use can be found in the ++docbook-xsl-doc-* packages. Please remember that if you create the nroff ++version in one of the debian/rules file targets (such as build), you will need ++to include xsltproc and docbook-xsl in your Build-Depends control field. ++Alternatively use the xmlto command/package. That will also automatically ++pull in xsltproc and docbook-xsl. ++ ++Notes for using docbook2x: docbook2x-man does not automatically create the ++AUTHOR(S) and COPYRIGHT sections. In this case, please add them manually as ++ ... . ++ ++To disable the automatic creation of the AUTHOR(S) and COPYRIGHT sections ++read /usr/share/doc/docbook-xsl/doc/manpages/authors.html. This file can be ++found in the docbook-xsl-doc-html package. ++ ++Validation can be done using: `xmllint -''-noout -''-valid manpage.xml` ++ ++General documentation about man-pages and man-page-formatting: ++man(1), man(7), http://www.tldp.org/HOWTO/Man-Page/ ++ ++--> ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++]> ++ ++ ++ ++ &dhtitle; ++ &dhpackage; ++ ++ ++ &dhfirstname; ++ &dhsurname; ++ Wrote this manpage for the Debian system. ++
++ &dhemail; ++
++
++
++ ++ 2007 ++ &dhusername; ++ ++ ++ This manual page was written for the Debian system ++ (but may be used by others). ++ Permission is granted to copy, distribute and/or modify this ++ document under the terms of the GNU General Public License, ++ Version 2 or (at your option) any later version published by ++ the Free Software Foundation. ++ On Debian systems, the complete text of the GNU General Public ++ License can be found in ++ /usr/share/common-licenses/GPL. ++ ++
++ ++ &dhucpackage; ++ &dhsection; ++ ++ ++ &dhpackage; ++ program to do something ++ ++ ++ ++ &dhpackage; ++ ++ ++ ++ ++ ++ ++ ++ ++ this ++ ++ ++ ++ ++ ++ ++ ++ this ++ that ++ ++ ++ ++ ++ &dhpackage; ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ DESCRIPTION ++ This manual page documents briefly the ++ &dhpackage; and bar ++ commands. ++ This manual page was written for the Debian distribution ++ because the original program does not have a manual page. ++ Instead, it has documentation in the GNU ++ info ++ 1 ++ format; see below. ++ &dhpackage; is a program that... ++ ++ ++ OPTIONS ++ The program follows the usual GNU command line syntax, ++ with long options starting with two dashes (`-'). A summary of ++ options is included below. For a complete description, see the ++ ++ info ++ 1 ++ files. ++ ++ ++ ++ ++ ++ ++ Does this and that. ++ ++ ++ ++ ++ ++ ++ Show summary of options. ++ ++ ++ ++ ++ ++ ++ Show version of program. ++ ++ ++ ++ ++ ++ FILES ++ ++ ++ /etc/foo.conf ++ ++ The system-wide configuration file to control the ++ behaviour of &dhpackage;. See ++ ++ foo.conf ++ 5 ++ for further details. ++ ++ ++ ++ ${HOME}/.foo.conf ++ ++ The per-user configuration file to control the ++ behaviour of &dhpackage;. See ++ ++ foo.conf ++ 5 ++ for further details. ++ ++ ++ ++ ++ ++ ENVIONMENT ++ ++ ++ FOO_CONF ++ ++ If used, the defined file is used as configuration ++ file (see also ). ++ ++ ++ ++ ++ ++ DIAGNOSTICS ++ The following diagnostics may be issued ++ on stderr: ++ ++ ++ Bad configuration file. Exiting. ++ ++ The configuration file seems to contain a broken configuration ++ line. Use the option, to get more info. ++ ++ ++ ++ ++ &dhpackage; provides some return codes, that can ++ be used in scripts: ++ ++ Code ++ Diagnostic ++ ++ 0 ++ Program exited successfully. ++ ++ ++ 1 ++ The configuration file seems to be broken. ++ ++ ++ ++ ++ ++ BUGS ++ The program is currently limited to only work ++ with the foobar library. ++ The upstreams BTS can be found ++ at . ++ ++ ++ SEE ALSO ++ ++ ++ bar ++ 1 ++ , ++ baz ++ 1 ++ , ++ foo.conf ++ 5 ++ ++ The programs are documented fully by The Rise and ++ Fall of a Fooish Bar available via the ++ info ++ 1 ++ system. ++ ++
++ diff --cc debian/source/format index 0000000,0000000..163aaf8 new file mode 100644 --- /dev/null +++ b/debian/source/format @@@ -1,0 -1,0 +1,1 @@@ ++3.0 (quilt) diff --cc debian/tests/control index 0000000,0000000..ff74c01 new file mode 100644 --- /dev/null +++ b/debian/tests/control @@@ -1,0 -1,0 +1,2 @@@ ++Tests: python spm-encode ++Depends: libsentencepiece0, sentencepiece, python3-sentencepiece diff --cc debian/tests/python index 0000000,0000000..b774114 new file mode 100755 --- /dev/null +++ b/debian/tests/python @@@ -1,0 -1,0 +1,19 @@@ ++#!/bin/sh ++ ++set -e ++ ++VERSION=$(dpkg-query --show --showformat='${Version}' sentencepiece | cut -d- -f1) ++cat < test_module.py ++import sentencepiece as spm ++ ++print('VERSION={}'.format(spm.__version__)) ++EOS ++ ++PYVERSION=$(python3 test_module.py) ++if [ "VERSION=$VERSION" = "$PYVERSION" ]; then ++ echo "run python module" ++else ++ echo "Failed to get module version: <${PYVERSION}>" ++ exit 1 ++fi ++rm -f test_module.py diff --cc debian/tests/spm-encode index 0000000,0000000..9a2f28e new file mode 100755 --- /dev/null +++ b/debian/tests/spm-encode @@@ -1,0 -1,0 +1,26 @@@ ++#!/bin/sh ++ ++set -e ++ ++cat < input.txt ++SentencePiece is an unsupervised text tokenizer and detokenizer mainly ++for Neural Network-based text generation systems where the vocabulary ++size is predetermined prior to the neural model ++training. SentencePiece implements subword units (e.g., ++byte-pair-encoding (BPE) [Sennrich et al.]) and unigram language model ++[Kudo.]) with the extension of direct training from raw ++sentences. SentencePiece allows us to make a purely end-to-end system ++that does not depend on language-specific pre/postprocessing. ++EOS ++ ++rm -f tiny.* ++spm_train --input=input.txt --model_prefix=tiny --vocab_size=100 --character_coverage=1.0 >/dev/null 2>&1 ++encoded=$(echo "I saw a girl with a telescope." | spm_encode --model=tiny.model) ++if [ "▁ I ▁s a w ▁a ▁ g ir l ▁w i t h ▁a ▁t el e s c o p e ." = "${encoded}" ]; then ++ echo "run spm_encode test" ++else ++ echo "Failed to spm_encode example: <${encoded}>" ++ exit 1 ++fi ++rm -f input.txt ++rm -f tiny.* diff --cc debian/watch index 0000000,0000000..336e9c8 new file mode 100644 --- /dev/null +++ b/debian/watch @@@ -1,0 -1,0 +1,4 @@@ ++version=4 ++opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%sentencepiece-$1-Source.tar.xz%" \ ++ https://github.com/google/sentencepiece/tags \ ++ (?:.*?/)?v(\d[\d.]*)\.tar\.gz debian uupdate