--- /dev/null
+# senencepiece for Debian
+
+The record of sentencepiece package specific information.
+
+## trixie or later
+
+Removed already applied upsteam's patches which was introduced 0.1.97-3.
+
+## bookworm
+
+The upstream of sentencepiece 0.1.97 was initially released around June 6, 2022,
+but it was pre-release version.
+The official release version was shipped at Aug 7, 2022.
+
+Accidentally, pre-release version was packaged as 0.1.97-1.
+Thus, some commits were not included into 0.1.97-1.
+
+To fix up this issue, commits since 5e5adf2f851a1514ccc435aae11ee830c438321b
+were applied as the following patch files.
+
+See https://github.com/google/sentencepiece/issues/794 about detail.
+
+0001-update-python-wrapper.patch
+0002-remove-debug-symbols-from-wheel-package.patch
+0003-allow-tab-character-to-be-used-in-user_defined_symbo.patch
+0004-add-test-to-use-tab-as-user-defined-symbols.patch
+0005-Uses-C-17-by-default.patch
+0006-Uses-std-atomic-to-define-global-variable.patch
+0007-Fix-a-typo.patch
+0008-Uses-absl-string_view-as-much-as-possible.patch
+0009-Fixed-build-break.patch
+0010-Added-ImmutableSentencePiece-class.patch
+0011-add-verbose-option.patch
+0012-Supports-ImmutableSentencePieceText-from-python-modu.patch
+0013-Adds-more-unittests.patch
+0014-Adds-SWIGPYTHON-flag.patch
+0015-remove-unused-ifdef-SWIG-macro.patch
+0016-Fixed-test-failure.patch
+0017-Uses-property-in-immutable-proto.patch
+0018-automatically-detect-the-number-of-CPUs-in-batch-pro.patch
+0019-support-slice-in-pieces-nbests-objects.patch
+0020-Updated-the-document.patch
+0021-Fixed-errors-in-example-notebook.patch
+0022-Fix-dead-links.patch
+0023-added-ShutdownLibrary-function-to-uninitialize-globa.patch
+0024-Fixed-the-issue-of-concatinating-paths-for-pkg-confi.patch
+
+ -- kenhys <kenhys@fabre.debian.net>, Sat, 17 Jun 2023 23:16:19 +0900
--- /dev/null
+sentencepiece (0.2.0-1) unstable; urgency=medium
+
+ * New upstream version 0.2.0
+ * debian/patches/fix-ftbfs-big-endian.patch
+ debian/patches/header-dependencies.patch
+ - Drop needless patch which is aleady fixed in upstream.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Fri, 23 Feb 2024 20:23:45 +0900
+
+sentencepiece (0.1.99-4) unstable; urgency=medium
+
+ * debian/clean
+ - Fix FTBFS (double build) (Closes: #1047552)
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 19 Aug 2023 17:14:56 +0900
+
+sentencepiece (0.1.99-3) unstable; urgency=medium
+
+ * debian/tests/control
+ - Fix regression (preventing migration) about
+ python module's autopkgtest.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 21 Jun 2023 14:44:27 +0900
+
+sentencepiece (0.1.99-2) unstable; urgency=medium
+
+ * debian/patches/fix-ftbfs-big-endian.patch
+ - Add patch to fix FTBFS on big endian platform.
+ * debian/tests/control
+ - Fix W: illegal-runtime-test-name warning
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Tue, 20 Jun 2023 19:28:50 +0900
+
+sentencepiece (0.1.99-1) unstable; urgency=medium
+
+ * New upstream version 0.1.99
+ * debian/control
+ - Bump Standards-Version to 4.6.2. No other changes are required.
+ * debian/patches/disable-static-library.patch
+ debian/patches/support-python-module-in-place.patch
+ - Refresh patch files for 0.1.99
+ * debian/patches/*.patch
+ - Drop deprecated patch files which was already applied in upstream.
+ * debian/README.Debian
+ - Update explanation of debian/patches.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Sun, 18 Jun 2023 00:04:54 +0900
+
+sentencepiece (0.1.97-3) unstable; urgency=medium
+
+ * debian/patches/0001-update-python-wrapper.patch
+ debian/patches/0002-remove-debug-symbols-from-wheel-package.patch
+ debian/patches/0003-allow-tab-character-to-be-used-in-user_defined_symbo.patch
+ debian/patches/0004-add-test-to-use-tab-as-user-defined-symbols.patch
+ debian/patches/0005-Uses-C-17-by-default.patch
+ debian/patches/0006-Uses-std-atomic-to-define-global-variable.patch
+ debian/patches/0007-Fix-a-typo.patch
+ debian/patches/0008-Uses-absl-string_view-as-much-as-possible.patch
+ debian/patches/0009-Fixed-build-break.patch
+ debian/patches/0010-Added-ImmutableSentencePiece-class.patch
+ debian/patches/0011-add-verbose-option.patch
+ debian/patches/0012-Supports-ImmutableSentencePieceText-from-python-modu.patch
+ debian/patches/0013-Adds-more-unittests.patch
+ debian/patches/0014-Adds-SWIGPYTHON-flag.patch
+ debian/patches/0015-remove-unused-ifdef-SWIG-macro.patch
+ debian/patches/0016-Fixed-test-failure.patch
+ debian/patches/0017-Uses-property-in-immutable-proto.patch
+ debian/patches/0018-automatically-detect-the-number-of-CPUs-in-batch-pro.patch
+ debian/patches/0019-support-slice-in-pieces-nbests-objects.patch
+ debian/patches/0020-Updated-the-document.patch
+ debian/patches/0021-Fixed-errors-in-example-notebook.patch
+ debian/patches/0022-Fix-dead-links.patch
+ debian/patches/0023-added-ShutdownLibrary-function-to-uninitialize-globa.patch
+ debian/patches/0024-Fixed-the-issue-of-concatinating-paths-for-pkg-confi.patch
+ - Add missing patch files for 0.1.97.
+ * debian/README.Debian
+ - Add explanation of debian/patches.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Mon, 21 Nov 2022 22:43:46 +0900
+
+sentencepiece (0.1.97-2) unstable; urgency=medium
+
+ * Team upload
+
+ [ Steve Langasek ]
+ * debian/patches/header-dependencies.patch: include necessary headers
+ to ensure IS_BIG_ENDIAN is defined, see #1017360.
+
+ -- Graham Inggs <ginggs@debian.org> Sun, 18 Sep 2022 05:30:57 +0000
+
+sentencepiece (0.1.97-1) unstable; urgency=medium
+
+ * New upstream version 0.1.97
+ * debian/copyright
+ - Update maintainer E-mail address
+ * debian/control
+ - Bump Standards-Version to 4.6.1. No other changes are required.
+ * debian/patches/support-python-module-in-place.patch
+ - Refresh path to build python module.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Tue, 14 Jun 2022 20:19:58 +0900
+
+sentencepiece (0.1.96-1) unstable; urgency=medium
+
+ * New upstream version 0.1.96
+ * debian/control
+ - Bump standard-version to 4.5.1. No changes are required.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 18 Aug 2021 20:52:46 +0900
+
+sentencepiece (0.1.95-1) unstable; urgency=medium
+
+ * New upstream version 0.1.95
+ * debian/patches/support-python-module-in-place.patch
+ - Fix undefined symbol when importing python module (Closes: #979040)
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Thu, 11 Feb 2021 17:36:23 +0900
+
+sentencepiece (0.1.94-2) unstable; urgency=medium
+
+ * Fix FTBFS on armel/mipsel (Closes: #977235)
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 16 Dec 2020 21:18:15 +0900
+
+sentencepiece (0.1.94-1) unstable; urgency=medium
+
+ * New upstream version 0.1.94
+ * debian/patches/support-python-module-in-place.patch
+ - Refresh path to build python module.
+ * debian/patches/fix-ftbfs-ports.patch
+ debian/patches/mutiarch-support.patch
+ - Remove needless patch because these patch was merged
+ to google/sentencepiece.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 28 Oct 2020 21:02:07 +0900
+
+sentencepiece (0.1.93-1) unstable; urgency=medium
+
+ * New upstream version 0.1.93
+ * debian/source/lintian-overrides
+ - Remove needless override.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Thu, 15 Oct 2020 21:32:05 +0900
+
+sentencepiece (0.1.92-3) unstable; urgency=medium
+
+ * debian/patches/fix-ftbfs-ports.patch
+ - Fix FTBFS on powerpc
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 03 Oct 2020 20:48:27 +0900
+
+sentencepiece (0.1.92-2) unstable; urgency=medium
+
+ * debian/patches/0002-Change-in-order-to-build-Python-modules-in-place.patch
+ - Fix FTBFS on hurd-i386
+ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch
+ - Fix missing dependency to atomic library (powerpc,m68k,sh4)
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 26 Sep 2020 20:27:17 +0900
+
+sentencepiece (0.1.92-1) unstable; urgency=medium
+
+ * New upstream version 0.1.92
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Fri, 19 Jun 2020 19:38:49 +0900
+
+sentencepiece (0.1.91-1) unstable; urgency=medium
+
+ * New upstream version 0.1.91
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Fri, 22 May 2020 15:17:42 +0900
+
+sentencepiece (0.1.90-3) unstable; urgency=medium
+
+ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch
+ - Refresh patch to fix FTBFS.
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Sun, 17 May 2020 09:02:23 +0900
+
+sentencepiece (0.1.90-2) unstable; urgency=medium
+
+ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch
+ - Add patch to fix FTBFS on mipsel and armel
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 16 May 2020 16:16:45 +0900
+
+sentencepiece (0.1.90-1) unstable; urgency=medium
+
+ * New upstream version 0.1.90
+ * debian/control
+ - Update Uploaders:
+ - Bump standard-version to 4.5.0
+ - Bump compat version to 13.
+ * debian/source/lintian-overrides
+ - Fix false positive source-is-missing
+ * debian/patches/0003-Disable-static-library-explicitly.patch
+ - Disable to build static library
+
+ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 13 May 2020 19:09:34 +0900
+
+sentencepiece (0.1.84-1) unstable; urgency=medium
+
+ * New upstream version 0.1.84 (Closes: #939860)
+
+ [ TSUCHIYA Masatoshi ]
+ * Initial packaging tasks.
+ * Remove pipeline configurations for BitBucket.
+
+ [ Kentaro Hayashi ]
+ * debian/gbp.conf
+ - Add basic configuration about debian-branch
+ * debian/watch
+ - Add missing watch file to detect a new release
+ * debian/control
+ - Update deprecated Priority: to optional
+ - Add Vcs-* fields
+ - Fix W: sentencepiece: description-synopsis-starts-with-article
+ - Bump standard version to 4.4.1
+ - Update Vcs-* under science-team
+ - Bump up compatibility level
+ - Drop python2 support
+ * debian/copyright
+ - Use https://
+ - Update copyright about third party modules
+ * debian/rules
+ - Enable hardening
+ * debian/salsa-ci.yml
+ - Add Salsa CI configuration
+
+ -- Kentaro Hayashi <hayashi@clear-code.com> Thu, 17 Oct 2019 13:33:34 +0900
--- /dev/null
+python/build/
+python/src/sentencepiece.egg-info/
--- /dev/null
+Source: sentencepiece
+Section: science
+Priority: optional
+Maintainer: Debian Science Maintainers <debian-science-maintainers@lists.alioth.debian.org>
+Uploaders:
+ TSUCHIYA Masatoshi <tsuchiya@namazu.org>,
+ Kentaro Hayashi <kenhys@xdump.org>
+Build-Depends:
+ debhelper-compat (= 13),
+ protobuf-compiler,
+ libprotobuf-dev,
+ dh-python,
+ python3-all-dev,
+ quilt,
+ cmake,
+ python3-setuptools
+Standards-Version: 4.6.2
+Homepage: https://github.com/google/sentencepiece
+Vcs-Browser: https://salsa.debian.org/science-team/sentencepiece
+Vcs-Git: https://salsa.debian.org/science-team/sentencepiece.git
+Rules-Requires-Root: no
+
+Package: sentencepiece
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: Unsupervised text tokenizer and detokenizer
+ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
+ designed for Neural Network-based text generation systems where the
+ vocabulary size is predetermined prior to the neural model training.
+
+Package: libsentencepiece0
+Section: libs
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: Library files of SentencePiece
+ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
+ designed for Neural Network-based text generation systems where the
+ vocabulary size is predetermined prior to the neural model training.
+
+Package: libsentencepiece-dev
+Section: libdevel
+Architecture: any
+Depends: libsentencepiece0 (= ${binary:Version}), ${misc:Depends}
+Description: Header files of SentencePiece
+ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
+ designed for Neural Network-based text generation systems where the
+ vocabulary size is predetermined prior to the neural model training.
+
+Package: python3-sentencepiece
+Section: python
+Architecture: any
+Depends:
+ ${shlibs:Depends},
+ ${misc:Depends},
+ ${python3:Depends}
+Description: SentencePiece binding for Python3
+ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
+ designed for Neural Network-based text generation systems where the
+ vocabulary size is predetermined prior to the neural model training.
+ .
+ python3-sentencepiece is its binding for Python3.
--- /dev/null
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: sentencepiece
+Source: https://github.com/google/sentencepiece
+
+Files: *
+Copyright: 2017-2024 Taku Kudo <taku@chasen.org>
+License: Apache-2.0
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ .
+ http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied. See the License for the specific language governing
+ permissions and limitations under the License.
+
+Files: debian/*
+Copyright:
+ 2016 TSUCHIYA Masatoshi <tsuchiya@namazu.org>
+ 2019-2024 Kentaro Hayashi <kenhys@xdump.org>
+License: GPL-2+
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ .
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ .
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>
+ .
+ On Debian systems, the complete text of the GNU General
+ Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
+
+Files: third_party/esaxx/*
+Copyright: 2010 Daisuke Okanohara
+License: MIT
+
+Files: third_party/darts_clone/*
+Copyright: 2008-2011, Susumu Yata
+License: BSD-3-clause
+
+Files: third_party/protobuf-lite/*
+Copyright: 2008 Google Inc.
+License: BSD-3-clause
+
+Files: data/Scripts.txt
+Copyright: 1991-2016 Unicode, Inc.
+License: Unicode
+ COPYRIGHT AND PERMISSION NOTICE
+ .
+ Copyright © 1991-2016 Unicode, Inc. All rights reserved.
+ Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+ .
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of the Unicode data files and any associated documentation
+ (the "Data Files") or Unicode software and any associated documentation
+ (the "Software") to deal in the Data Files or Software
+ without restriction, including without limitation the rights to use,
+ copy, modify, merge, publish, distribute, and/or sell copies of
+ the Data Files or Software, and to permit persons to whom the Data Files
+ or Software are furnished to do so, provided that either
+ (a) this copyright and permission notice appear with all copies
+ of the Data Files or Software, or
+ (b) this copyright and permission notice appear in associated
+ Documentation.
+ .
+ THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+ NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+ DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+ .
+ Except as contained in this notice, the name of a copyright holder
+ shall not be used in advertising or otherwise to promote the sale,
+ use or other dealings in these Data Files or Software without prior
+ written authorization of the copyright holder.
+
+Files: data/botchan.txt
+Copyright: Kin-nosuke Natsume
+License: public-domain
+ Written by Kin-nosuke Natume and put into the public domain.
+ It's transalted by Yasotaro Morri and published by Project Gutenberg.
+
+Files: data/wagahaiwa_nekodearu.txt
+Copyright: Kin-nosuke Natsume
+License: public-domain
+ Written by Kin-nosuke Natume and put into the public domain.
+ It's digitized by Aozora Bunko collabolator and published by Aozora Bunko.
+
+License: MIT
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use,
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following
+ conditions:
+ .
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+ .
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ OTHER DEALINGS IN THE SOFTWARE.
+
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modificatio n, are permitted provided that the following conditions
+ are met:
+ .
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+ - Neither the name of the <ORGANIZATION> nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- /dev/null
+[DEFAULT]
+debian-branch = master
+
--- /dev/null
+usr/lib/*/lib*.so
+usr/lib/*/pkgconfig/*
+usr/include/*
--- /dev/null
+usr/lib/*/lib*.so.*
--- /dev/null
+From: Kentaro Hayashi <kenhys@gmail.com>
+Date: Sat, 17 Jun 2023 22:47:25 +0900
+Subject: Disable static library explicitly
+
+Forwarded: not-needed
+Bug-Debian: N/A
+---
+ src/CMakeLists.txt | 11 +----------
+ 1 file changed, 1 insertion(+), 10 deletions(-)
+
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 8d4a34f..4db4324 100644
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -230,16 +230,10 @@ if (SPM_ENABLE_SHARED)
+ endif()
+ endif()
+
+-add_library(sentencepiece-static STATIC ${SPM_SRCS})
+-add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS})
+-
+-target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS})
+-target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS})
+-
+ if (SPM_ENABLE_SHARED)
+ target_link_libraries(sentencepiece ${SPM_LIBS})
+ target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
+- set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static)
++ set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train)
+ set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0)
+ set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
+ set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
+@@ -256,9 +250,6 @@ else()
+ set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
+ endif()
+
+-set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece")
+-set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train")
+-
+ if (NOT MSVC)
+ if (SPM_COVERAGE)
+ set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}")
--- /dev/null
+disable-static-library.patch
+support-python-module-in-place.patch
--- /dev/null
+From: Kentaro Hayashi <kenhys@gmail.com>
+Date: Fri, 23 Feb 2024 20:27:25 +0900
+Subject: Support to build Python module without pkg-config
+
+---
+ python/setup.py | 25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/python/setup.py b/python/setup.py
+index d600321..43bd868 100755
+--- a/python/setup.py
++++ b/python/setup.py
+@@ -77,15 +77,21 @@ class build_ext(_build_ext):
+ """Override build_extension to run cmake."""
+
+ def build_extension(self, ext):
+- cflags, libs = get_cflags_and_libs('../build/root')
+-
+- if len(libs) == 0:
+- if is_sentencepiece_installed():
+- cflags = cflags + run_pkg_config('cflags')
+- libs = run_pkg_config('libs')
+- else:
+- subprocess.check_call(['./build_bundled.sh', __version__])
+- cflags, libs = get_cflags_and_libs('./build/root')
++ # cflags, libs = get_cflags_and_libs('../build/root')
++ # if len(libs) == 0:
++ # cflags, libs = get_cflags_and_libs('./bundled/root')
++
++ # if len(libs) == 0:
++ # if is_sentencepiece_installed():
++ # cflags = cflags + run_pkg_config('cflags')
++ # libs = run_pkg_config('libs')
++ # else:
++ # subprocess.check_call(['./build_bundled.sh', __version__])
++ # cflags, libs = get_cflags_and_libs('./bundled/root')
++ cflags = ['-I../src']
++ cmd = "dpkg-architecture -q DEB_BUILD_GNU_TYPE"
++ arch = subprocess.check_output(cmd, shell=True).decode("utf-8").strip().split()[0]
++ libs = ["-L../obj-%s/src" % arch, "-lsentencepiece", "-lsentencepiece_train"]
+
+ # Fix compile on some versions of Mac OSX
+ # See: https://github.com/neulab/xnmt/issues/199
+@@ -96,6 +102,7 @@ class build_ext(_build_ext):
+ libs.append('-Wl,-strip-all')
+ if sys.platform == 'linux':
+ libs.append('-Wl,-Bsymbolic')
++
+ print('## cflags={}'.format(' '.join(cflags)))
+ print('## libs={}'.format(' '.join(libs)))
+ ext.extra_compile_args = cflags
--- /dev/null
+usr/lib/python3.*/
--- /dev/null
+#!/usr/bin/make -f
+# -*- makefile -*-
+# Sample debian/rules that uses debhelper.
+# This file was originally written by Joey Hess and Craig Small.
+# As a special exception, when this file is copied by dh-make into a
+# dh-make output file, you may use that output file without restriction.
+# This special exception was added by Craig Small in version 0.37 of dh-make.
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+export DEB_BUILD_MAINT_OPTIONS = hardening=+all
+DPKG_EXPORT_BUILDFLAGS = 1
+include /usr/share/dpkg/buildflags.mk
+
+ifneq (,$(filter $(DEB_HOST_ARCH), armel mipsel m68k powerpc sh4))
+ export DEB_LDFLAGS_MAINT_APPEND += -Wl,--no-as-needed -latomic -Wl,--as-needed
+endif
+
+%:
+ dh $@ --with python3 --buildsystem=cmake
+
+override_dh_auto_configure:
+ dh_auto_configure --buildsystem=cmake
+ dh_auto_configure --sourcedirectory=python --buildsystem=pybuild
+
+override_dh_auto_build:
+ dh_auto_build --buildsystem=cmake
+ dh_auto_build --sourcedirectory=python --buildsystem=pybuild
+
+override_dh_auto_install: basedir=$(shell pwd)/debian
+override_dh_auto_install:
+ dh_auto_install --buildsystem=cmake
+ dh_auto_install --sourcedirectory=python --buildsystem=pybuild
+
+override_dh_auto_clean:
+ dh_auto_clean --buildsystem=cmake
+ -rm -rf .pybuild
+ -rm -rf .python/sentencepiece.egg-info
+
+# Do no tests.
+override_dh_auto_test:
--- /dev/null
+---
+include:
+ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
+ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
+
+reprotest:
+ allow_failure: true
--- /dev/null
+<?xml version='1.0' encoding='UTF-8'?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
+
+<!--
+
+`xsltproc -''-nonet \
+ -''-param man.charmap.use.subset "0" \
+ -''-param make.year.ranges "1" \
+ -''-param make.single.year.ranges "1" \
+ /usr/share/xml/docbook/stylesheet/nwalsh/manpages/docbook.xsl \
+ manpage.xml'
+
+A manual page <package>.<section> will be generated. You may view the
+manual page with: nroff -man <package>.<section> | less'. A typical entry
+in a Makefile or Makefile.am is:
+
+DB2MAN = /usr/share/sgml/docbook/stylesheet/xsl/nwalsh/manpages/docbook.xsl
+XP = xsltproc -''-nonet -''-param man.charmap.use.subset "0"
+
+manpage.1: manpage.xml
+ $(XP) $(DB2MAN) $<
+
+The xsltproc binary is found in the xsltproc package. The XSL files are in
+docbook-xsl. A description of the parameters you can use can be found in the
+docbook-xsl-doc-* packages. Please remember that if you create the nroff
+version in one of the debian/rules file targets (such as build), you will need
+to include xsltproc and docbook-xsl in your Build-Depends control field.
+Alternatively use the xmlto command/package. That will also automatically
+pull in xsltproc and docbook-xsl.
+
+Notes for using docbook2x: docbook2x-man does not automatically create the
+AUTHOR(S) and COPYRIGHT sections. In this case, please add them manually as
+<refsect1> ... </refsect1>.
+
+To disable the automatic creation of the AUTHOR(S) and COPYRIGHT sections
+read /usr/share/doc/docbook-xsl/doc/manpages/authors.html. This file can be
+found in the docbook-xsl-doc-html package.
+
+Validation can be done using: `xmllint -''-noout -''-valid manpage.xml`
+
+General documentation about man-pages and man-page-formatting:
+man(1), man(7), http://www.tldp.org/HOWTO/Man-Page/
+
+-->
+
+ <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+ <!ENTITY dhfirstname "FIRSTNAME">
+ <!ENTITY dhsurname "SURNAME">
+ <!-- dhusername could also be set to "&firstname; &surname;". -->
+ <!ENTITY dhusername "TSUCHIYA Masatoshi">
+ <!ENTITY dhemail "tsuchiya@namazu.org">
+ <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+ allowed: see man(7), man(1) and
+ http://www.tldp.org/HOWTO/Man-Page/q2.html. -->
+ <!ENTITY dhsection "SECTION">
+ <!-- TITLE should be something like "User commands" or similar (see
+ http://www.tldp.org/HOWTO/Man-Page/q2.html). -->
+ <!ENTITY dhtitle "sentencepiece User Manual">
+ <!ENTITY dhucpackage "CRFSUITE">
+ <!ENTITY dhpackage "sentencepiece">
+]>
+
+<refentry>
+ <refentryinfo>
+ <title>&dhtitle;</title>
+ <productname>&dhpackage;</productname>
+ <authorgroup>
+ <author>
+ <firstname>&dhfirstname;</firstname>
+ <surname>&dhsurname;</surname>
+ <contrib>Wrote this manpage for the Debian system.</contrib>
+ <address>
+ <email>&dhemail;</email>
+ </address>
+ </author>
+ </authorgroup>
+ <copyright>
+ <year>2007</year>
+ <holder>&dhusername;</holder>
+ </copyright>
+ <legalnotice>
+ <para>This manual page was written for the Debian system
+ (but may be used by others).</para>
+ <para>Permission is granted to copy, distribute and/or modify this
+ document under the terms of the GNU General Public License,
+ Version 2 or (at your option) any later version published by
+ the Free Software Foundation.</para>
+ <para>On Debian systems, the complete text of the GNU General Public
+ License can be found in
+ <filename>/usr/share/common-licenses/GPL</filename>.</para>
+ </legalnotice>
+ </refentryinfo>
+ <refmeta>
+ <refentrytitle>&dhucpackage;</refentrytitle>
+ <manvolnum>&dhsection;</manvolnum>
+ </refmeta>
+ <refnamediv>
+ <refname>&dhpackage;</refname>
+ <refpurpose>program to do something</refpurpose>
+ </refnamediv>
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>&dhpackage;</command>
+ <!-- These are several examples, how syntaxes could look -->
+ <arg choice="plain"><option>-e <replaceable>this</replaceable></option></arg>
+ <arg choice="opt"><option>--example=<parameter>that</parameter></option></arg>
+ <arg choice="opt">
+ <group choice="req">
+ <arg choice="plain"><option>-e</option></arg>
+ <arg choice="plain"><option>--example</option></arg>
+ </group>
+ <replaceable class="option">this</replaceable>
+ </arg>
+ <arg choice="opt">
+ <group choice="req">
+ <arg choice="plain"><option>-e</option></arg>
+ <arg choice="plain"><option>--example</option></arg>
+ </group>
+ <group choice="req">
+ <arg choice="plain"><replaceable>this</replaceable></arg>
+ <arg choice="plain"><replaceable>that</replaceable></arg>
+ </group>
+ </arg>
+ </cmdsynopsis>
+ <cmdsynopsis>
+ <command>&dhpackage;</command>
+ <!-- Normally the help and version options make the programs stop
+ right after outputting the requested information. -->
+ <group choice="opt">
+ <arg choice="plain">
+ <group choice="req">
+ <arg choice="plain"><option>-h</option></arg>
+ <arg choice="plain"><option>--help</option></arg>
+ </group>
+ </arg>
+ <arg choice="plain">
+ <group choice="req">
+ <arg choice="plain"><option>-v</option></arg>
+ <arg choice="plain"><option>--version</option></arg>
+ </group>
+ </arg>
+ </group>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+ <refsect1 id="description">
+ <title>DESCRIPTION</title>
+ <para>This manual page documents briefly the
+ <command>&dhpackage;</command> and <command>bar</command>
+ commands.</para>
+ <para>This manual page was written for the Debian distribution
+ because the original program does not have a manual page.
+ Instead, it has documentation in the GNU <citerefentry>
+ <refentrytitle>info</refentrytitle>
+ <manvolnum>1</manvolnum>
+ </citerefentry> format; see below.</para>
+ <para><command>&dhpackage;</command> is a program that...</para>
+ </refsect1>
+ <refsect1 id="options">
+ <title>OPTIONS</title>
+ <para>The program follows the usual GNU command line syntax,
+ with long options starting with two dashes (`-'). A summary of
+ options is included below. For a complete description, see the
+ <citerefentry>
+ <refentrytitle>info</refentrytitle>
+ <manvolnum>1</manvolnum>
+ </citerefentry> files.</para>
+ <variablelist>
+ <!-- Use the variablelist.term.separator and the
+ variablelist.term.break.after parameters to
+ control the term elements. -->
+ <varlistentry>
+ <term><option>-e <replaceable>this</replaceable></option></term>
+ <term><option>--example=<replaceable>that</replaceable></option></term>
+ <listitem>
+ <para>Does this and that.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>-h</option></term>
+ <term><option>--help</option></term>
+ <listitem>
+ <para>Show summary of options.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>-v</option></term>
+ <term><option>--version</option></term>
+ <listitem>
+ <para>Show version of program.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+ <refsect1 id="files">
+ <title>FILES</title>
+ <variablelist>
+ <varlistentry>
+ <term><filename>/etc/foo.conf</filename></term>
+ <listitem>
+ <para>The system-wide configuration file to control the
+ behaviour of <application>&dhpackage;</application>. See
+ <citerefentry>
+ <refentrytitle>foo.conf</refentrytitle>
+ <manvolnum>5</manvolnum>
+ </citerefentry> for further details.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><filename>${HOME}/.foo.conf</filename></term>
+ <listitem>
+ <para>The per-user configuration file to control the
+ behaviour of <application>&dhpackage;</application>. See
+ <citerefentry>
+ <refentrytitle>foo.conf</refentrytitle>
+ <manvolnum>5</manvolnum>
+ </citerefentry> for further details.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+ <refsect1 id="environment">
+ <title>ENVIONMENT</title>
+ <variablelist>
+ <varlistentry>
+ <term><envar>FOO_CONF</envar></term>
+ <listitem>
+ <para>If used, the defined file is used as configuration
+ file (see also <xref linkend="files"/>).</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+ <refsect1 id="diagnostics">
+ <title>DIAGNOSTICS</title>
+ <para>The following diagnostics may be issued
+ on <filename class="devicefile">stderr</filename>:</para>
+ <variablelist>
+ <varlistentry>
+ <term><errortext>Bad configuration file. Exiting.</errortext></term>
+ <listitem>
+ <para>The configuration file seems to contain a broken configuration
+ line. Use the <option>--verbose</option> option, to get more info.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para><command>&dhpackage;</command> provides some return codes, that can
+ be used in scripts:</para>
+ <segmentedlist>
+ <segtitle>Code</segtitle>
+ <segtitle>Diagnostic</segtitle>
+ <seglistitem>
+ <seg><errorcode>0</errorcode></seg>
+ <seg>Program exited successfully.</seg>
+ </seglistitem>
+ <seglistitem>
+ <seg><errorcode>1</errorcode></seg>
+ <seg>The configuration file seems to be broken.</seg>
+ </seglistitem>
+ </segmentedlist>
+ </refsect1>
+ <refsect1 id="bugs">
+ <!-- Or use this section to tell about upstream BTS. -->
+ <title>BUGS</title>
+ <para>The program is currently limited to only work
+ with the <package>foobar</package> library.</para>
+ <para>The upstreams <acronym>BTS</acronym> can be found
+ at <ulink url="http://bugzilla.foo.tld"/>.</para>
+ </refsect1>
+ <refsect1 id="see_also">
+ <title>SEE ALSO</title>
+ <!-- In alpabetical order. -->
+ <para><citerefentry>
+ <refentrytitle>bar</refentrytitle>
+ <manvolnum>1</manvolnum>
+ </citerefentry>, <citerefentry>
+ <refentrytitle>baz</refentrytitle>
+ <manvolnum>1</manvolnum>
+ </citerefentry>, <citerefentry>
+ <refentrytitle>foo.conf</refentrytitle>
+ <manvolnum>5</manvolnum>
+ </citerefentry></para>
+ <para>The programs are documented fully by <citetitle>The Rise and
+ Fall of a Fooish Bar</citetitle> available via the <citerefentry>
+ <refentrytitle>info</refentrytitle>
+ <manvolnum>1</manvolnum>
+ </citerefentry> system.</para>
+ </refsect1>
+</refentry>
+
--- /dev/null
+3.0 (quilt)
--- /dev/null
+Tests: python spm-encode
+Depends: libsentencepiece0, sentencepiece, python3-sentencepiece
--- /dev/null
+#!/bin/sh
+
+set -e
+
+VERSION=$(dpkg-query --show --showformat='${Version}' sentencepiece | cut -d- -f1)
+cat <<EOS > test_module.py
+import sentencepiece as spm
+
+print('VERSION={}'.format(spm.__version__))
+EOS
+
+PYVERSION=$(python3 test_module.py)
+if [ "VERSION=$VERSION" = "$PYVERSION" ]; then
+ echo "run python module"
+else
+ echo "Failed to get module version: <${PYVERSION}>"
+ exit 1
+fi
+rm -f test_module.py
--- /dev/null
+#!/bin/sh
+
+set -e
+
+cat <<EOS > input.txt
+SentencePiece is an unsupervised text tokenizer and detokenizer mainly
+for Neural Network-based text generation systems where the vocabulary
+size is predetermined prior to the neural model
+training. SentencePiece implements subword units (e.g.,
+byte-pair-encoding (BPE) [Sennrich et al.]) and unigram language model
+[Kudo.]) with the extension of direct training from raw
+sentences. SentencePiece allows us to make a purely end-to-end system
+that does not depend on language-specific pre/postprocessing.
+EOS
+
+rm -f tiny.*
+spm_train --input=input.txt --model_prefix=tiny --vocab_size=100 --character_coverage=1.0 >/dev/null 2>&1
+encoded=$(echo "I saw a girl with a telescope." | spm_encode --model=tiny.model)
+if [ "▁ I ▁s a w ▁a ▁ g ir l ▁w i t h ▁a ▁t el e s c o p e ." = "${encoded}" ]; then
+ echo "run spm_encode test"
+else
+ echo "Failed to spm_encode example: <${encoded}>"
+ exit 1
+fi
+rm -f input.txt
+rm -f tiny.*
--- /dev/null
+version=4
+opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%sentencepiece-$1-Source.tar.xz%" \
+ https://github.com/google/sentencepiece/tags \
+ (?:.*?/)?v(\d[\d.]*)\.tar\.gz debian uupdate