--- /dev/null
--- /dev/null
++# senencepiece for Debian
++
++The record of sentencepiece package specific information.
++
++## trixie or later
++
++Removed already applied upsteam's patches which was introduced 0.1.97-3.
++
++## bookworm
++
++The upstream of sentencepiece 0.1.97 was initially released around June 6, 2022,
++but it was pre-release version.
++The official release version was shipped at Aug 7, 2022.
++
++Accidentally, pre-release version was packaged as 0.1.97-1.
++Thus, some commits were not included into 0.1.97-1.
++
++To fix up this issue, commits since 5e5adf2f851a1514ccc435aae11ee830c438321b
++were applied as the following patch files.
++
++See https://github.com/google/sentencepiece/issues/794 about detail.
++
++0001-update-python-wrapper.patch
++0002-remove-debug-symbols-from-wheel-package.patch
++0003-allow-tab-character-to-be-used-in-user_defined_symbo.patch
++0004-add-test-to-use-tab-as-user-defined-symbols.patch
++0005-Uses-C-17-by-default.patch
++0006-Uses-std-atomic-to-define-global-variable.patch
++0007-Fix-a-typo.patch
++0008-Uses-absl-string_view-as-much-as-possible.patch
++0009-Fixed-build-break.patch
++0010-Added-ImmutableSentencePiece-class.patch
++0011-add-verbose-option.patch
++0012-Supports-ImmutableSentencePieceText-from-python-modu.patch
++0013-Adds-more-unittests.patch
++0014-Adds-SWIGPYTHON-flag.patch
++0015-remove-unused-ifdef-SWIG-macro.patch
++0016-Fixed-test-failure.patch
++0017-Uses-property-in-immutable-proto.patch
++0018-automatically-detect-the-number-of-CPUs-in-batch-pro.patch
++0019-support-slice-in-pieces-nbests-objects.patch
++0020-Updated-the-document.patch
++0021-Fixed-errors-in-example-notebook.patch
++0022-Fix-dead-links.patch
++0023-added-ShutdownLibrary-function-to-uninitialize-globa.patch
++0024-Fixed-the-issue-of-concatinating-paths-for-pkg-confi.patch
++
++ -- kenhys <kenhys@fabre.debian.net>, Sat, 17 Jun 2023 23:16:19 +0900
--- /dev/null
--- /dev/null
++sentencepiece (0.1.99-4) unstable; urgency=medium
++
++ * debian/clean
++ - Fix FTBFS (double build) (Closes: #1047552)
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 19 Aug 2023 17:14:56 +0900
++
++sentencepiece (0.1.99-3) unstable; urgency=medium
++
++ * debian/tests/control
++ - Fix regression (preventing migration) about
++ python module's autopkgtest.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 21 Jun 2023 14:44:27 +0900
++
++sentencepiece (0.1.99-2) unstable; urgency=medium
++
++ * debian/patches/fix-ftbfs-big-endian.patch
++ - Add patch to fix FTBFS on big endian platform.
++ * debian/tests/control
++ - Fix W: illegal-runtime-test-name warning
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Tue, 20 Jun 2023 19:28:50 +0900
++
++sentencepiece (0.1.99-1) unstable; urgency=medium
++
++ * New upstream version 0.1.99
++ * debian/control
++ - Bump Standards-Version to 4.6.2. No other changes are required.
++ * debian/patches/disable-static-library.patch
++ debian/patches/support-python-module-in-place.patch
++ - Refresh patch files for 0.1.99
++ * debian/patches/*.patch
++ - Drop deprecated patch files which was already applied in upstream.
++ * debian/README.Debian
++ - Update explanation of debian/patches.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Sun, 18 Jun 2023 00:04:54 +0900
++
++sentencepiece (0.1.97-3) unstable; urgency=medium
++
++ * debian/patches/0001-update-python-wrapper.patch
++ debian/patches/0002-remove-debug-symbols-from-wheel-package.patch
++ debian/patches/0003-allow-tab-character-to-be-used-in-user_defined_symbo.patch
++ debian/patches/0004-add-test-to-use-tab-as-user-defined-symbols.patch
++ debian/patches/0005-Uses-C-17-by-default.patch
++ debian/patches/0006-Uses-std-atomic-to-define-global-variable.patch
++ debian/patches/0007-Fix-a-typo.patch
++ debian/patches/0008-Uses-absl-string_view-as-much-as-possible.patch
++ debian/patches/0009-Fixed-build-break.patch
++ debian/patches/0010-Added-ImmutableSentencePiece-class.patch
++ debian/patches/0011-add-verbose-option.patch
++ debian/patches/0012-Supports-ImmutableSentencePieceText-from-python-modu.patch
++ debian/patches/0013-Adds-more-unittests.patch
++ debian/patches/0014-Adds-SWIGPYTHON-flag.patch
++ debian/patches/0015-remove-unused-ifdef-SWIG-macro.patch
++ debian/patches/0016-Fixed-test-failure.patch
++ debian/patches/0017-Uses-property-in-immutable-proto.patch
++ debian/patches/0018-automatically-detect-the-number-of-CPUs-in-batch-pro.patch
++ debian/patches/0019-support-slice-in-pieces-nbests-objects.patch
++ debian/patches/0020-Updated-the-document.patch
++ debian/patches/0021-Fixed-errors-in-example-notebook.patch
++ debian/patches/0022-Fix-dead-links.patch
++ debian/patches/0023-added-ShutdownLibrary-function-to-uninitialize-globa.patch
++ debian/patches/0024-Fixed-the-issue-of-concatinating-paths-for-pkg-confi.patch
++ - Add missing patch files for 0.1.97.
++ * debian/README.Debian
++ - Add explanation of debian/patches.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Mon, 21 Nov 2022 22:43:46 +0900
++
++sentencepiece (0.1.97-2) unstable; urgency=medium
++
++ * Team upload
++
++ [ Steve Langasek ]
++ * debian/patches/header-dependencies.patch: include necessary headers
++ to ensure IS_BIG_ENDIAN is defined, see #1017360.
++
++ -- Graham Inggs <ginggs@debian.org> Sun, 18 Sep 2022 05:30:57 +0000
++
++sentencepiece (0.1.97-1) unstable; urgency=medium
++
++ * New upstream version 0.1.97
++ * debian/copyright
++ - Update maintainer E-mail address
++ * debian/control
++ - Bump Standards-Version to 4.6.1. No other changes are required.
++ * debian/patches/support-python-module-in-place.patch
++ - Refresh path to build python module.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Tue, 14 Jun 2022 20:19:58 +0900
++
++sentencepiece (0.1.96-1) unstable; urgency=medium
++
++ * New upstream version 0.1.96
++ * debian/control
++ - Bump standard-version to 4.5.1. No changes are required.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 18 Aug 2021 20:52:46 +0900
++
++sentencepiece (0.1.95-1) unstable; urgency=medium
++
++ * New upstream version 0.1.95
++ * debian/patches/support-python-module-in-place.patch
++ - Fix undefined symbol when importing python module (Closes: #979040)
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Thu, 11 Feb 2021 17:36:23 +0900
++
++sentencepiece (0.1.94-2) unstable; urgency=medium
++
++ * Fix FTBFS on armel/mipsel (Closes: #977235)
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 16 Dec 2020 21:18:15 +0900
++
++sentencepiece (0.1.94-1) unstable; urgency=medium
++
++ * New upstream version 0.1.94
++ * debian/patches/support-python-module-in-place.patch
++ - Refresh path to build python module.
++ * debian/patches/fix-ftbfs-ports.patch
++ debian/patches/mutiarch-support.patch
++ - Remove needless patch because these patch was merged
++ to google/sentencepiece.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 28 Oct 2020 21:02:07 +0900
++
++sentencepiece (0.1.93-1) unstable; urgency=medium
++
++ * New upstream version 0.1.93
++ * debian/source/lintian-overrides
++ - Remove needless override.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Thu, 15 Oct 2020 21:32:05 +0900
++
++sentencepiece (0.1.92-3) unstable; urgency=medium
++
++ * debian/patches/fix-ftbfs-ports.patch
++ - Fix FTBFS on powerpc
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 03 Oct 2020 20:48:27 +0900
++
++sentencepiece (0.1.92-2) unstable; urgency=medium
++
++ * debian/patches/0002-Change-in-order-to-build-Python-modules-in-place.patch
++ - Fix FTBFS on hurd-i386
++ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch
++ - Fix missing dependency to atomic library (powerpc,m68k,sh4)
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 26 Sep 2020 20:27:17 +0900
++
++sentencepiece (0.1.92-1) unstable; urgency=medium
++
++ * New upstream version 0.1.92
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Fri, 19 Jun 2020 19:38:49 +0900
++
++sentencepiece (0.1.91-1) unstable; urgency=medium
++
++ * New upstream version 0.1.91
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Fri, 22 May 2020 15:17:42 +0900
++
++sentencepiece (0.1.90-3) unstable; urgency=medium
++
++ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch
++ - Refresh patch to fix FTBFS.
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Sun, 17 May 2020 09:02:23 +0900
++
++sentencepiece (0.1.90-2) unstable; urgency=medium
++
++ * debian/patches/0004-Fix-FTBFS-on-armel-and-mipsel.patch
++ - Add patch to fix FTBFS on mipsel and armel
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Sat, 16 May 2020 16:16:45 +0900
++
++sentencepiece (0.1.90-1) unstable; urgency=medium
++
++ * New upstream version 0.1.90
++ * debian/control
++ - Update Uploaders:
++ - Bump standard-version to 4.5.0
++ - Bump compat version to 13.
++ * debian/source/lintian-overrides
++ - Fix false positive source-is-missing
++ * debian/patches/0003-Disable-static-library-explicitly.patch
++ - Disable to build static library
++
++ -- Kentaro Hayashi <kenhys@xdump.org> Wed, 13 May 2020 19:09:34 +0900
++
++sentencepiece (0.1.84-1) unstable; urgency=medium
++
++ * New upstream version 0.1.84 (Closes: #939860)
++
++ [ TSUCHIYA Masatoshi ]
++ * Initial packaging tasks.
++ * Remove pipeline configurations for BitBucket.
++
++ [ Kentaro Hayashi ]
++ * debian/gbp.conf
++ - Add basic configuration about debian-branch
++ * debian/watch
++ - Add missing watch file to detect a new release
++ * debian/control
++ - Update deprecated Priority: to optional
++ - Add Vcs-* fields
++ - Fix W: sentencepiece: description-synopsis-starts-with-article
++ - Bump standard version to 4.4.1
++ - Update Vcs-* under science-team
++ - Bump up compatibility level
++ - Drop python2 support
++ * debian/copyright
++ - Use https://
++ - Update copyright about third party modules
++ * debian/rules
++ - Enable hardening
++ * debian/salsa-ci.yml
++ - Add Salsa CI configuration
++
++ -- Kentaro Hayashi <hayashi@clear-code.com> Thu, 17 Oct 2019 13:33:34 +0900
--- /dev/null
--- /dev/null
++python/build/
++python/src/sentencepiece.egg-info/
--- /dev/null
--- /dev/null
++Source: sentencepiece
++Section: science
++Priority: optional
++Maintainer: Debian Science Maintainers <debian-science-maintainers@lists.alioth.debian.org>
++Uploaders:
++ TSUCHIYA Masatoshi <tsuchiya@namazu.org>,
++ Kentaro Hayashi <kenhys@xdump.org>
++Build-Depends:
++ debhelper-compat (= 13),
++ protobuf-compiler,
++ libprotobuf-dev,
++ dh-python,
++ python3-all-dev,
++ quilt,
++ cmake,
++ python3-setuptools
++Standards-Version: 4.6.2
++Homepage: https://github.com/google/sentencepiece
++Vcs-Browser: https://salsa.debian.org/science-team/sentencepiece
++Vcs-Git: https://salsa.debian.org/science-team/sentencepiece.git
++Rules-Requires-Root: no
++
++Package: sentencepiece
++Architecture: any
++Depends: ${shlibs:Depends}, ${misc:Depends}
++Description: Unsupervised text tokenizer and detokenizer
++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
++ designed for Neural Network-based text generation systems where the
++ vocabulary size is predetermined prior to the neural model training.
++
++Package: libsentencepiece0
++Section: libs
++Architecture: any
++Depends: ${shlibs:Depends}, ${misc:Depends}
++Description: Library files of SentencePiece
++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
++ designed for Neural Network-based text generation systems where the
++ vocabulary size is predetermined prior to the neural model training.
++
++Package: libsentencepiece-dev
++Section: libdevel
++Architecture: any
++Depends: libsentencepiece0 (= ${binary:Version}), ${misc:Depends}
++Description: Header files of SentencePiece
++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
++ designed for Neural Network-based text generation systems where the
++ vocabulary size is predetermined prior to the neural model training.
++
++Package: python3-sentencepiece
++Section: python
++Architecture: any
++Depends:
++ ${shlibs:Depends},
++ ${misc:Depends},
++ ${python3:Depends}
++Description: SentencePiece binding for Python3
++ SentencePiece is an unsupervised text tokenizer/detokenizer mainly
++ designed for Neural Network-based text generation systems where the
++ vocabulary size is predetermined prior to the neural model training.
++ .
++ python3-sentencepiece is its binding for Python3.
--- /dev/null
--- /dev/null
++Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
++Upstream-Name: sentencepiece
++Source: https://github.com/google/sentencepiece
++
++Files: *
++Copyright: 2017 Taku Kudo <taku@chasen.org>
++License: Apache-2.0
++ Licensed under the Apache License, Version 2.0 (the "License");
++ you may not use this file except in compliance with the License.
++ You may obtain a copy of the License at
++ .
++ http://www.apache.org/licenses/LICENSE-2.0
++ .
++ Unless required by applicable law or agreed to in writing, software
++ distributed under the License is distributed on an "AS IS" BASIS,
++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
++ implied. See the License for the specific language governing
++ permissions and limitations under the License.
++
++Files: debian/*
++Copyright:
++ 2016 TSUCHIYA Masatoshi <tsuchiya@namazu.org>
++ 2019-2022 Kentaro Hayashi <kenhys@xdump.org>
++License: GPL-2+
++ This package is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2 of the License, or
++ (at your option) any later version.
++ .
++ This package is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++ .
++ You should have received a copy of the GNU General Public License
++ along with this program. If not, see <http://www.gnu.org/licenses/>
++ .
++ On Debian systems, the complete text of the GNU General
++ Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
++
++Files: third_party/esaxx/*
++Copyright: 2010 Daisuke Okanohara
++License: MIT
++
++Files: third_party/darts_clone/*
++Copyright: 2008-2011, Susumu Yata
++License: BSD-3-clause
++
++Files: third_party/protobuf-lite/*
++Copyright: 2008 Google Inc.
++License: BSD-3-clause
++
++Files: data/Scripts.txt
++Copyright: 1991-2016 Unicode, Inc.
++License: Unicode
++ COPYRIGHT AND PERMISSION NOTICE
++ .
++ Copyright © 1991-2016 Unicode, Inc. All rights reserved.
++ Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
++ .
++ Permission is hereby granted, free of charge, to any person obtaining
++ a copy of the Unicode data files and any associated documentation
++ (the "Data Files") or Unicode software and any associated documentation
++ (the "Software") to deal in the Data Files or Software
++ without restriction, including without limitation the rights to use,
++ copy, modify, merge, publish, distribute, and/or sell copies of
++ the Data Files or Software, and to permit persons to whom the Data Files
++ or Software are furnished to do so, provided that either
++ (a) this copyright and permission notice appear with all copies
++ of the Data Files or Software, or
++ (b) this copyright and permission notice appear in associated
++ Documentation.
++ .
++ THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
++ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
++ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ NONINFRINGEMENT OF THIRD PARTY RIGHTS.
++ IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
++ NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
++ DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
++ DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
++ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
++ PERFORMANCE OF THE DATA FILES OR SOFTWARE.
++ .
++ Except as contained in this notice, the name of a copyright holder
++ shall not be used in advertising or otherwise to promote the sale,
++ use or other dealings in these Data Files or Software without prior
++ written authorization of the copyright holder.
++
++Files: data/botchan.txt
++Copyright: Kin-nosuke Natsume
++License: public-domain
++ Written by Kin-nosuke Natume and put into the public domain.
++ It's transalted by Yasotaro Morri and published by Project Gutenberg.
++
++Files: data/wagahaiwa_nekodearu.txt
++Copyright: Kin-nosuke Natsume
++License: public-domain
++ Written by Kin-nosuke Natume and put into the public domain.
++ It's digitized by Aozora Bunko collabolator and published by Aozora Bunko.
++
++License: MIT
++ Permission is hereby granted, free of charge, to any person
++ obtaining a copy of this software and associated documentation
++ files (the "Software"), to deal in the Software without
++ restriction, including without limitation the rights to use,
++ copy, modify, merge, publish, distribute, sublicense, and/or sell
++ copies of the Software, and to permit persons to whom the
++ Software is furnished to do so, subject to the following
++ conditions:
++ .
++ The above copyright notice and this permission notice shall be
++ included in all copies or substantial portions of the Software.
++ .
++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ OTHER DEALINGS IN THE SOFTWARE.
++
++License: BSD-3-clause
++ Redistribution and use in source and binary forms, with or without
++ modificatio n, are permitted provided that the following conditions
++ are met:
++ .
++ - Redistributions of source code must retain the above copyright
++ notice, this list of conditions and the following disclaimer.
++ - Redistributions in binary form must reproduce the above copyright
++ notice, this list of conditions and the following disclaimer in the
++ documentation and/or other materials provided with the
++ distribution.
++ - Neither the name of the <ORGANIZATION> nor the names of its
++ contributors may be used to endorse or promote products derived
++ from this software without specific prior written permission.
++ .
++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
--- /dev/null
--- /dev/null
++[DEFAULT]
++debian-branch = master
++
--- /dev/null
--- /dev/null
++usr/lib/*/lib*.so
++usr/lib/*/pkgconfig/*
++usr/include/*
--- /dev/null
--- /dev/null
++usr/lib/*/lib*.so.*
--- /dev/null
--- /dev/null
++From: Kentaro Hayashi <kenhys@gmail.com>
++Date: Sat, 17 Jun 2023 22:47:25 +0900
++Subject: Disable static library explicitly
++
++Forwarded: not-needed
++Bug-Debian: N/A
++---
++ src/CMakeLists.txt | 11 +----------
++ 1 file changed, 1 insertion(+), 10 deletions(-)
++
++diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
++index 1c7726e..077d37d 100644
++--- a/src/CMakeLists.txt
+++++ b/src/CMakeLists.txt
++@@ -222,16 +222,10 @@ if (SPM_ENABLE_SHARED)
++ add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
++ endif()
++
++-add_library(sentencepiece-static STATIC ${SPM_SRCS})
++-add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS})
++-
++-target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS})
++-target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS})
++-
++ if (SPM_ENABLE_SHARED)
++ target_link_libraries(sentencepiece ${SPM_LIBS})
++ target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
++- set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static)
+++ set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train)
++ set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0)
++ set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
++ set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
++@@ -248,9 +242,6 @@ else()
++ set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
++ endif()
++
++-set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece")
++-set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train")
++-
++ if (NOT MSVC)
++ if (SPM_COVERAGE)
++ set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}")
--- /dev/null
--- /dev/null
++From: Kentaro Hayashi <kenhys@gmail.com>
++Date: Tue, 20 Jun 2023 17:12:58 +0900
++Subject: Fixes build test errors in big-endian machines
++
++Author: Taku Kudo <taku@google.com>
++Origin: https://github.com/google/sentencepiece/commit/827591a0c552f2187aac8b8e0f999e8ff31aad81.patch
++Forwarded: not-needed
++---
++ CMakeLists.txt | 5 +++++
++ src/CMakeLists.txt | 2 ++
++ src/common.h | 10 ++++++----
++ src/normalizer.cc | 15 ++++++++-------
++ src/unigram_model_trainer_test.cc | 5 ++---
++ 5 files changed, 23 insertions(+), 14 deletions(-)
++
++diff --git a/CMakeLists.txt b/CMakeLists.txt
++index 1b3af04..a2f0f77 100644
++--- a/CMakeLists.txt
+++++ b/CMakeLists.txt
++@@ -33,6 +33,11 @@ option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF)
++ option(SPM_USE_BUILTIN_PROTOBUF "Use built-in protobuf" ON)
++ option(SPM_USE_EXTERNAL_ABSL "Use external abseil" OFF)
++ option(SPM_ENABLE_MSVC_MT_BUILD, "Use /MT flag in MSVC build" OFF)
+++option(SPM_CROSS_SYSTEM_PROCESSOR, "Override system processor" "")
+++
+++if (SPM_CROSS_SYSTEM_PROCESSOR)
+++ set(CMAKE_SYSTEM_PROCESSOR ${SPM_CROSS_SYSTEM_PROCESSOR})
+++endif()
++
++ # Disable shared build on windows
++ if(WIN32)
++diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
++index 077d37d..09ef57f 100644
++--- a/src/CMakeLists.txt
+++++ b/src/CMakeLists.txt
++@@ -208,6 +208,7 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR
++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips") OR
++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "m68k") OR
++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc") OR
+++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") OR
++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch") OR
++ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "sh4"))
++ find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1)
++@@ -217,6 +218,7 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR
++ endif()
++ endif()
++
+++
++ if (SPM_ENABLE_SHARED)
++ add_library(sentencepiece SHARED ${SPM_SRCS})
++ add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
++diff --git a/src/common.h b/src/common.h
++index ef5546d..b38b3f7 100644
++--- a/src/common.h
+++++ b/src/common.h
++@@ -79,10 +79,6 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
++ #endif
++ #endif
++
++-#ifdef IS_BIG_ENDIAN
++-inline uint32 Swap32(uint32 x) { return __builtin_bswap32(x); }
++-#endif
++-
++ namespace sentencepiece {
++ #ifdef OS_WIN
++ namespace win32 {
++@@ -90,6 +86,12 @@ std::wstring Utf8ToWide(const absl::string_view input);
++ } // namespace win32
++ #endif
++
+++#ifdef IS_BIG_ENDIAN
+++namespace util {
+++inline uint32 Swap32(uint32 x) { return __builtin_bswap32(x); }
+++} // namespace util
+++#endif
+++
++ namespace error {
++
++ void Abort();
++diff --git a/src/normalizer.cc b/src/normalizer.cc
++index 2ab8084..53e43c4 100644
++--- a/src/normalizer.cc
+++++ b/src/normalizer.cc
++@@ -260,14 +260,14 @@ std::string Normalizer::EncodePrecompiledCharsMap(
++ std::string blob;
++ blob.append(string_util::EncodePOD<uint32>(trie_blob.size()));
++ blob.append(trie_blob.data(), trie_blob.size());
++- blob.append(normalized.data(), normalized.size());
++
++ #ifdef IS_BIG_ENDIAN
++ uint32 *data = reinterpret_cast<uint32 *>(const_cast<char *>(blob.data()));
++- for (int i = 0; i <= trie_blob.size() / 4; ++i)
++- data[i] = util::Swap32(data[i]);
+++ for (int i = 0; i < blob.size() / 4; ++i) data[i] = util::Swap32(data[i]);
++ #endif
++
+++ blob.append(normalized.data(), normalized.size());
+++
++ return blob;
++ }
++
++@@ -279,8 +279,7 @@ util::Status Normalizer::DecodePrecompiledCharsMap(
++ if (blob.size() <= sizeof(trie_blob_size) ||
++ !string_util::DecodePOD<uint32>(
++ absl::string_view(blob.data(), sizeof(trie_blob_size)),
++- &trie_blob_size) ||
++- trie_blob_size >= blob.size()) {
+++ &trie_blob_size)) {
++ return util::InternalError("Blob for normalization rule is broken.");
++ }
++
++@@ -288,15 +287,17 @@ util::Status Normalizer::DecodePrecompiledCharsMap(
++ trie_blob_size = util::Swap32(trie_blob_size);
++ #endif
++
++- if (trie_blob_size >= blob.size())
+++ if (trie_blob_size >= blob.size()) {
++ return util::InternalError("Trie data size exceeds the input blob size.");
+++ }
++
++ blob.remove_prefix(sizeof(trie_blob_size));
++
++ #ifdef IS_BIG_ENDIAN
+++ CHECK_OR_RETURN(buffer);
++ buffer->assign(blob.data(), trie_blob_size);
++ uint32 *data = reinterpret_cast<uint32 *>(const_cast<char *>(buffer->data()));
++- for (int i = 0; i < trie_blob_size / 4; ++i) data[i] = util::Swap32(data[i]);
+++ for (int i = 0; i < buffer->size() / 4; ++i) data[i] = util::Swap32(data[i]);
++ *trie_blob = absl::string_view(buffer->data(), trie_blob_size);
++ #else
++ *trie_blob = absl::string_view(blob.data(), trie_blob_size);
++diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc
++index 9d2c526..31da90b 100644
++--- a/src/unigram_model_trainer_test.cc
+++++ b/src/unigram_model_trainer_test.cc
++@@ -106,6 +106,7 @@ TrainerResult RunTrainer(const std::vector<std::string>& input, int size,
++
++ TrainerResult res;
++ res.seed_pieces_and_probs = seed_pieces;
+++ std::sort(pieces.begin(), pieces.end());
++ res.sentence_pieces = absl::StrJoin(pieces, " ");
++ return res;
++ }
++@@ -119,10 +120,8 @@ TEST(UnigramTrainerTest, BasicTest) {
++ // Check seed pieces.
++ EXPECT_EQ(27, res.seed_pieces_and_probs.size());
++
++- LOG(INFO) << "[" << res.sentence_pieces << "]";
++-
++ // Check final pieces.
++- EXPECT_EQ("i a n y m l e apple ve O P r g t an v ▁ b A le ▁an p d h",
+++ EXPECT_EQ("A O P a an apple b d e g h i l le m n p r t v ve y ▁ ▁an",
++ res.sentence_pieces);
++ }
++
--- /dev/null
--- /dev/null
++From: Kentaro Hayashi <kenhys@gmail.com>
++Date: Mon, 21 Nov 2022 22:17:18 +0900
++Subject: Include necessary headers to ensure IS_BIG_ENDIAN is defined
++
++normalizer.h uses IS_BIG_ENDIAN, which is defined in util.h.
++Include util.h here.
++
++Author: Steve Langasek <steve.langasek@ubuntu.com>
++Last-Update: 2022-08-27
++Forwarded: no
++Bug-Debian: https://bugs.debian.org/1017360
++---
++ src/normalizer.h | 1 +
++ 1 file changed, 1 insertion(+)
++
++diff --git a/src/normalizer.h b/src/normalizer.h
++index c79813c..37fdb8a 100644
++--- a/src/normalizer.h
+++++ b/src/normalizer.h
++@@ -22,6 +22,7 @@
++ #include <vector>
++
++ #include "common.h"
+++#include "util.h"
++ #include "sentencepiece_model.pb.h"
++ #include "sentencepiece_processor.h"
++ #include "third_party/absl/strings/string_view.h"
--- /dev/null
--- /dev/null
++disable-static-library.patch
++support-python-module-in-place.patch
++header-dependencies.patch
++fix-ftbfs-big-endian.patch
--- /dev/null
--- /dev/null
++From: Kentaro Hayashi <kenhys@gmail.com>
++Date: Sat, 17 Jun 2023 22:39:14 +0900
++Subject: Support to build Python module without pkg-config
++
++---
++ python/setup.py | 34 ++++++++++++++++++++--------------
++ 1 file changed, 20 insertions(+), 14 deletions(-)
++
++diff --git a/python/setup.py b/python/setup.py
++index 5411231..631a8c4 100755
++--- a/python/setup.py
+++++ b/python/setup.py
++@@ -77,23 +77,29 @@ class build_ext(_build_ext):
++ """Override build_extension to run cmake."""
++
++ def build_extension(self, ext):
++- cflags, libs = get_cflags_and_libs('../build/root')
++-
++- if len(libs) == 0:
++- if is_sentencepiece_installed():
++- cflags = cflags + run_pkg_config('cflags')
++- libs = run_pkg_config('libs')
++- else:
++- subprocess.check_call(['./build_bundled.sh', __version__])
++- cflags, libs = get_cflags_and_libs('./build/root')
+++ # cflags, libs = get_cflags_and_libs('../build/root')
+++ # if len(libs) == 0:
+++ # cflags, libs = get_cflags_and_libs('./bundled/root')
+++
+++ # if len(libs) == 0:
+++ # if is_sentencepiece_installed():
+++ # cflags = cflags + run_pkg_config('cflags')
+++ # libs = run_pkg_config('libs')
+++ # else:
+++ # subprocess.check_call(['./build_bundled.sh', __version__])
+++ # cflags, libs = get_cflags_and_libs('./bundled/root')
++
++ # Fix compile on some versions of Mac OSX
++ # See: https://github.com/neulab/xnmt/issues/199
++- if sys.platform == 'darwin':
++- cflags.append('-mmacosx-version-min=10.9')
++- else:
++- cflags.append('-Wl,-strip-all')
++- libs.append('-Wl,-strip-all')
+++ # if sys.platform == 'darwin':
+++ # cflags.append('-mmacosx-version-min=10.9')
+++ # else:
+++ # cflags.append('-Wl,-strip-all')
+++ # libs.append('-Wl,-strip-all')
+++ cflags = ['-I../src']
+++ cmd = "dpkg-architecture -q DEB_BUILD_GNU_TYPE"
+++ arch = subprocess.check_output(cmd, shell=True).decode("utf-8").strip().split()[0]
+++ libs = ["-L../obj-%s/src" % arch, "-lsentencepiece", "-lsentencepiece_train"]
++ print('## cflags={}'.format(' '.join(cflags)))
++ print('## libs={}'.format(' '.join(libs)))
++ ext.extra_compile_args = cflags
--- /dev/null
--- /dev/null
++usr/lib/python3.*/
--- /dev/null
--- /dev/null
++#!/usr/bin/make -f
++# -*- makefile -*-
++# Sample debian/rules that uses debhelper.
++# This file was originally written by Joey Hess and Craig Small.
++# As a special exception, when this file is copied by dh-make into a
++# dh-make output file, you may use that output file without restriction.
++# This special exception was added by Craig Small in version 0.37 of dh-make.
++
++# Uncomment this to turn on verbose mode.
++#export DH_VERBOSE=1
++export DEB_BUILD_MAINT_OPTIONS = hardening=+all
++DPKG_EXPORT_BUILDFLAGS = 1
++include /usr/share/dpkg/buildflags.mk
++
++ifneq (,$(filter $(DEB_HOST_ARCH), armel mipsel m68k powerpc sh4))
++ export DEB_LDFLAGS_MAINT_APPEND += -Wl,--no-as-needed -latomic -Wl,--as-needed
++endif
++
++%:
++ dh $@ --with python3 --buildsystem=cmake
++
++override_dh_auto_configure:
++ dh_auto_configure --buildsystem=cmake
++ dh_auto_configure --sourcedirectory=python --buildsystem=pybuild
++
++override_dh_auto_build:
++ dh_auto_build --buildsystem=cmake
++ dh_auto_build --sourcedirectory=python --buildsystem=pybuild
++
++override_dh_auto_install: basedir=$(shell pwd)/debian
++override_dh_auto_install:
++ dh_auto_install --buildsystem=cmake
++ dh_auto_install --sourcedirectory=python --buildsystem=pybuild
++
++override_dh_auto_clean:
++ dh_auto_clean --buildsystem=cmake
++ -rm -rf .pybuild
++ -rm -rf .python/sentencepiece.egg-info
++
++# Do no tests.
++override_dh_auto_test:
--- /dev/null
--- /dev/null
++---
++include:
++ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
++ - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
++
++reprotest:
++ allow_failure: true
--- /dev/null
--- /dev/null
++doc/*.md
--- /dev/null
--- /dev/null
++usr/bin/*
--- /dev/null
--- /dev/null
++<?xml version='1.0' encoding='UTF-8'?>
++<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
++"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
++
++<!--
++
++`xsltproc -''-nonet \
++ -''-param man.charmap.use.subset "0" \
++ -''-param make.year.ranges "1" \
++ -''-param make.single.year.ranges "1" \
++ /usr/share/xml/docbook/stylesheet/nwalsh/manpages/docbook.xsl \
++ manpage.xml'
++
++A manual page <package>.<section> will be generated. You may view the
++manual page with: nroff -man <package>.<section> | less'. A typical entry
++in a Makefile or Makefile.am is:
++
++DB2MAN = /usr/share/sgml/docbook/stylesheet/xsl/nwalsh/manpages/docbook.xsl
++XP = xsltproc -''-nonet -''-param man.charmap.use.subset "0"
++
++manpage.1: manpage.xml
++ $(XP) $(DB2MAN) $<
++
++The xsltproc binary is found in the xsltproc package. The XSL files are in
++docbook-xsl. A description of the parameters you can use can be found in the
++docbook-xsl-doc-* packages. Please remember that if you create the nroff
++version in one of the debian/rules file targets (such as build), you will need
++to include xsltproc and docbook-xsl in your Build-Depends control field.
++Alternatively use the xmlto command/package. That will also automatically
++pull in xsltproc and docbook-xsl.
++
++Notes for using docbook2x: docbook2x-man does not automatically create the
++AUTHOR(S) and COPYRIGHT sections. In this case, please add them manually as
++<refsect1> ... </refsect1>.
++
++To disable the automatic creation of the AUTHOR(S) and COPYRIGHT sections
++read /usr/share/doc/docbook-xsl/doc/manpages/authors.html. This file can be
++found in the docbook-xsl-doc-html package.
++
++Validation can be done using: `xmllint -''-noout -''-valid manpage.xml`
++
++General documentation about man-pages and man-page-formatting:
++man(1), man(7), http://www.tldp.org/HOWTO/Man-Page/
++
++-->
++
++ <!-- Fill in your name for FIRSTNAME and SURNAME. -->
++ <!ENTITY dhfirstname "FIRSTNAME">
++ <!ENTITY dhsurname "SURNAME">
++ <!-- dhusername could also be set to "&firstname; &surname;". -->
++ <!ENTITY dhusername "TSUCHIYA Masatoshi">
++ <!ENTITY dhemail "tsuchiya@namazu.org">
++ <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
++ allowed: see man(7), man(1) and
++ http://www.tldp.org/HOWTO/Man-Page/q2.html. -->
++ <!ENTITY dhsection "SECTION">
++ <!-- TITLE should be something like "User commands" or similar (see
++ http://www.tldp.org/HOWTO/Man-Page/q2.html). -->
++ <!ENTITY dhtitle "sentencepiece User Manual">
++ <!ENTITY dhucpackage "CRFSUITE">
++ <!ENTITY dhpackage "sentencepiece">
++]>
++
++<refentry>
++ <refentryinfo>
++ <title>&dhtitle;</title>
++ <productname>&dhpackage;</productname>
++ <authorgroup>
++ <author>
++ <firstname>&dhfirstname;</firstname>
++ <surname>&dhsurname;</surname>
++ <contrib>Wrote this manpage for the Debian system.</contrib>
++ <address>
++ <email>&dhemail;</email>
++ </address>
++ </author>
++ </authorgroup>
++ <copyright>
++ <year>2007</year>
++ <holder>&dhusername;</holder>
++ </copyright>
++ <legalnotice>
++ <para>This manual page was written for the Debian system
++ (but may be used by others).</para>
++ <para>Permission is granted to copy, distribute and/or modify this
++ document under the terms of the GNU General Public License,
++ Version 2 or (at your option) any later version published by
++ the Free Software Foundation.</para>
++ <para>On Debian systems, the complete text of the GNU General Public
++ License can be found in
++ <filename>/usr/share/common-licenses/GPL</filename>.</para>
++ </legalnotice>
++ </refentryinfo>
++ <refmeta>
++ <refentrytitle>&dhucpackage;</refentrytitle>
++ <manvolnum>&dhsection;</manvolnum>
++ </refmeta>
++ <refnamediv>
++ <refname>&dhpackage;</refname>
++ <refpurpose>program to do something</refpurpose>
++ </refnamediv>
++ <refsynopsisdiv>
++ <cmdsynopsis>
++ <command>&dhpackage;</command>
++ <!-- These are several examples, how syntaxes could look -->
++ <arg choice="plain"><option>-e <replaceable>this</replaceable></option></arg>
++ <arg choice="opt"><option>--example=<parameter>that</parameter></option></arg>
++ <arg choice="opt">
++ <group choice="req">
++ <arg choice="plain"><option>-e</option></arg>
++ <arg choice="plain"><option>--example</option></arg>
++ </group>
++ <replaceable class="option">this</replaceable>
++ </arg>
++ <arg choice="opt">
++ <group choice="req">
++ <arg choice="plain"><option>-e</option></arg>
++ <arg choice="plain"><option>--example</option></arg>
++ </group>
++ <group choice="req">
++ <arg choice="plain"><replaceable>this</replaceable></arg>
++ <arg choice="plain"><replaceable>that</replaceable></arg>
++ </group>
++ </arg>
++ </cmdsynopsis>
++ <cmdsynopsis>
++ <command>&dhpackage;</command>
++ <!-- Normally the help and version options make the programs stop
++ right after outputting the requested information. -->
++ <group choice="opt">
++ <arg choice="plain">
++ <group choice="req">
++ <arg choice="plain"><option>-h</option></arg>
++ <arg choice="plain"><option>--help</option></arg>
++ </group>
++ </arg>
++ <arg choice="plain">
++ <group choice="req">
++ <arg choice="plain"><option>-v</option></arg>
++ <arg choice="plain"><option>--version</option></arg>
++ </group>
++ </arg>
++ </group>
++ </cmdsynopsis>
++ </refsynopsisdiv>
++ <refsect1 id="description">
++ <title>DESCRIPTION</title>
++ <para>This manual page documents briefly the
++ <command>&dhpackage;</command> and <command>bar</command>
++ commands.</para>
++ <para>This manual page was written for the Debian distribution
++ because the original program does not have a manual page.
++ Instead, it has documentation in the GNU <citerefentry>
++ <refentrytitle>info</refentrytitle>
++ <manvolnum>1</manvolnum>
++ </citerefentry> format; see below.</para>
++ <para><command>&dhpackage;</command> is a program that...</para>
++ </refsect1>
++ <refsect1 id="options">
++ <title>OPTIONS</title>
++ <para>The program follows the usual GNU command line syntax,
++ with long options starting with two dashes (`-'). A summary of
++ options is included below. For a complete description, see the
++ <citerefentry>
++ <refentrytitle>info</refentrytitle>
++ <manvolnum>1</manvolnum>
++ </citerefentry> files.</para>
++ <variablelist>
++ <!-- Use the variablelist.term.separator and the
++ variablelist.term.break.after parameters to
++ control the term elements. -->
++ <varlistentry>
++ <term><option>-e <replaceable>this</replaceable></option></term>
++ <term><option>--example=<replaceable>that</replaceable></option></term>
++ <listitem>
++ <para>Does this and that.</para>
++ </listitem>
++ </varlistentry>
++ <varlistentry>
++ <term><option>-h</option></term>
++ <term><option>--help</option></term>
++ <listitem>
++ <para>Show summary of options.</para>
++ </listitem>
++ </varlistentry>
++ <varlistentry>
++ <term><option>-v</option></term>
++ <term><option>--version</option></term>
++ <listitem>
++ <para>Show version of program.</para>
++ </listitem>
++ </varlistentry>
++ </variablelist>
++ </refsect1>
++ <refsect1 id="files">
++ <title>FILES</title>
++ <variablelist>
++ <varlistentry>
++ <term><filename>/etc/foo.conf</filename></term>
++ <listitem>
++ <para>The system-wide configuration file to control the
++ behaviour of <application>&dhpackage;</application>. See
++ <citerefentry>
++ <refentrytitle>foo.conf</refentrytitle>
++ <manvolnum>5</manvolnum>
++ </citerefentry> for further details.</para>
++ </listitem>
++ </varlistentry>
++ <varlistentry>
++ <term><filename>${HOME}/.foo.conf</filename></term>
++ <listitem>
++ <para>The per-user configuration file to control the
++ behaviour of <application>&dhpackage;</application>. See
++ <citerefentry>
++ <refentrytitle>foo.conf</refentrytitle>
++ <manvolnum>5</manvolnum>
++ </citerefentry> for further details.</para>
++ </listitem>
++ </varlistentry>
++ </variablelist>
++ </refsect1>
++ <refsect1 id="environment">
++ <title>ENVIONMENT</title>
++ <variablelist>
++ <varlistentry>
++ <term><envar>FOO_CONF</envar></term>
++ <listitem>
++ <para>If used, the defined file is used as configuration
++ file (see also <xref linkend="files"/>).</para>
++ </listitem>
++ </varlistentry>
++ </variablelist>
++ </refsect1>
++ <refsect1 id="diagnostics">
++ <title>DIAGNOSTICS</title>
++ <para>The following diagnostics may be issued
++ on <filename class="devicefile">stderr</filename>:</para>
++ <variablelist>
++ <varlistentry>
++ <term><errortext>Bad configuration file. Exiting.</errortext></term>
++ <listitem>
++ <para>The configuration file seems to contain a broken configuration
++ line. Use the <option>--verbose</option> option, to get more info.
++ </para>
++ </listitem>
++ </varlistentry>
++ </variablelist>
++ <para><command>&dhpackage;</command> provides some return codes, that can
++ be used in scripts:</para>
++ <segmentedlist>
++ <segtitle>Code</segtitle>
++ <segtitle>Diagnostic</segtitle>
++ <seglistitem>
++ <seg><errorcode>0</errorcode></seg>
++ <seg>Program exited successfully.</seg>
++ </seglistitem>
++ <seglistitem>
++ <seg><errorcode>1</errorcode></seg>
++ <seg>The configuration file seems to be broken.</seg>
++ </seglistitem>
++ </segmentedlist>
++ </refsect1>
++ <refsect1 id="bugs">
++ <!-- Or use this section to tell about upstream BTS. -->
++ <title>BUGS</title>
++ <para>The program is currently limited to only work
++ with the <package>foobar</package> library.</para>
++ <para>The upstreams <acronym>BTS</acronym> can be found
++ at <ulink url="http://bugzilla.foo.tld"/>.</para>
++ </refsect1>
++ <refsect1 id="see_also">
++ <title>SEE ALSO</title>
++ <!-- In alpabetical order. -->
++ <para><citerefentry>
++ <refentrytitle>bar</refentrytitle>
++ <manvolnum>1</manvolnum>
++ </citerefentry>, <citerefentry>
++ <refentrytitle>baz</refentrytitle>
++ <manvolnum>1</manvolnum>
++ </citerefentry>, <citerefentry>
++ <refentrytitle>foo.conf</refentrytitle>
++ <manvolnum>5</manvolnum>
++ </citerefentry></para>
++ <para>The programs are documented fully by <citetitle>The Rise and
++ Fall of a Fooish Bar</citetitle> available via the <citerefentry>
++ <refentrytitle>info</refentrytitle>
++ <manvolnum>1</manvolnum>
++ </citerefentry> system.</para>
++ </refsect1>
++</refentry>
++
--- /dev/null
--- /dev/null
++3.0 (quilt)
--- /dev/null
--- /dev/null
++Tests: python spm-encode
++Depends: libsentencepiece0, sentencepiece, python3-sentencepiece
--- /dev/null
--- /dev/null
++#!/bin/sh
++
++set -e
++
++VERSION=$(dpkg-query --show --showformat='${Version}' sentencepiece | cut -d- -f1)
++cat <<EOS > test_module.py
++import sentencepiece as spm
++
++print('VERSION={}'.format(spm.__version__))
++EOS
++
++PYVERSION=$(python3 test_module.py)
++if [ "VERSION=$VERSION" = "$PYVERSION" ]; then
++ echo "run python module"
++else
++ echo "Failed to get module version: <${PYVERSION}>"
++ exit 1
++fi
++rm -f test_module.py
--- /dev/null
--- /dev/null
++#!/bin/sh
++
++set -e
++
++cat <<EOS > input.txt
++SentencePiece is an unsupervised text tokenizer and detokenizer mainly
++for Neural Network-based text generation systems where the vocabulary
++size is predetermined prior to the neural model
++training. SentencePiece implements subword units (e.g.,
++byte-pair-encoding (BPE) [Sennrich et al.]) and unigram language model
++[Kudo.]) with the extension of direct training from raw
++sentences. SentencePiece allows us to make a purely end-to-end system
++that does not depend on language-specific pre/postprocessing.
++EOS
++
++rm -f tiny.*
++spm_train --input=input.txt --model_prefix=tiny --vocab_size=100 --character_coverage=1.0 >/dev/null 2>&1
++encoded=$(echo "I saw a girl with a telescope." | spm_encode --model=tiny.model)
++if [ "▁ I ▁s a w ▁a ▁ g ir l ▁w i t h ▁a ▁t el e s c o p e ." = "${encoded}" ]; then
++ echo "run spm_encode test"
++else
++ echo "Failed to spm_encode example: <${encoded}>"
++ exit 1
++fi
++rm -f input.txt
++rm -f tiny.*
--- /dev/null
--- /dev/null
++version=4
++opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%sentencepiece-$1-Source.tar.xz%" \
++ https://github.com/google/sentencepiece/tags \
++ (?:.*?/)?v(\d[\d.]*)\.tar\.gz debian uupdate