Supports ImmutableSentencePieceText from python module
authorTaku Kudo <taku@google.com>
Mon, 1 Aug 2022 08:19:09 +0000 (17:19 +0900)
committerKentaro Hayashi <kenhys@xdump.org>
Mon, 21 Nov 2022 13:43:46 +0000 (13:43 +0000)
Signed-off-by: Kentaro Hayashi <kenhys@gmail.com>
Gbp-Pq: Name 0012-Supports-ImmutableSentencePieceText-from-python-modu.patch

python/src/sentencepiece/__init__.py
python/src/sentencepiece/sentencepiece.i
python/src/sentencepiece/sentencepiece_wrap.cxx
python/test/sentencepiece_test.py
src/sentencepiece_processor.cc
src/sentencepiece_processor.h
src/sentencepiece_processor_test.cc

index 1543d32abe59cc97316717ffb54b64c4f5384806..69a9825821e894ac4834984ea6370ad3233a3aef 100644 (file)
@@ -61,6 +61,98 @@ class _SwigNonDynamicMeta(type):
     __setattr__ = _swig_setattr_nondynamic_class_variable(type.__setattr__)
 
 
+class ImmutableSentencePieceText_ImmutableSentencePiece(object):
+    thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag")
+    __repr__ = _swig_repr
+
+    def __init__(self):
+        _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_swiginit(self, _sentencepiece.new_ImmutableSentencePieceText_ImmutableSentencePiece())
+    __swig_destroy__ = _sentencepiece.delete_ImmutableSentencePieceText_ImmutableSentencePiece
+
+    def piece(self):
+        return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_piece(self)
+
+    def surface(self):
+        return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_surface(self)
+
+    def id(self):
+        return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_id(self)
+
+    def begin(self):
+        return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_begin(self)
+
+    def end(self):
+        return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_end(self)
+
+# Register ImmutableSentencePieceText_ImmutableSentencePiece in _sentencepiece:
+_sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_swigregister(ImmutableSentencePieceText_ImmutableSentencePiece)
+
+class ImmutableSentencePieceText(object):
+    thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag")
+    __repr__ = _swig_repr
+
+    def __init__(self):
+        _sentencepiece.ImmutableSentencePieceText_swiginit(self, _sentencepiece.new_ImmutableSentencePieceText())
+    __swig_destroy__ = _sentencepiece.delete_ImmutableSentencePieceText
+
+    def pieces_size(self):
+        return _sentencepiece.ImmutableSentencePieceText_pieces_size(self)
+
+    def text(self):
+        return _sentencepiece.ImmutableSentencePieceText_text(self)
+
+    def score(self):
+        return _sentencepiece.ImmutableSentencePieceText_score(self)
+
+    def SerializeAsString(self):
+        return _sentencepiece.ImmutableSentencePieceText_SerializeAsString(self)
+
+    def pieces(self, index):
+        return _sentencepiece.ImmutableSentencePieceText_pieces(self, index)
+
+    def __len__(self):
+      return self.pieces_size()
+
+    def __getitem__(self, i):
+      return self.pieces(i)
+
+    def __eq__(self, other):
+      return self.SerializeAsString() == other.SerializeAsString()
+
+
+# Register ImmutableSentencePieceText in _sentencepiece:
+_sentencepiece.ImmutableSentencePieceText_swigregister(ImmutableSentencePieceText)
+
+class ImmutableNBestSentencePieceText(object):
+    thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag")
+    __repr__ = _swig_repr
+
+    def __init__(self):
+        _sentencepiece.ImmutableNBestSentencePieceText_swiginit(self, _sentencepiece.new_ImmutableNBestSentencePieceText())
+    __swig_destroy__ = _sentencepiece.delete_ImmutableNBestSentencePieceText
+
+    def nbests_size(self):
+        return _sentencepiece.ImmutableNBestSentencePieceText_nbests_size(self)
+
+    def SerializeAsString(self):
+        return _sentencepiece.ImmutableNBestSentencePieceText_SerializeAsString(self)
+
+    def nbests(self, index):
+        return _sentencepiece.ImmutableNBestSentencePieceText_nbests(self, index)
+
+    def __len__(self):
+      return self.nbests_size()
+
+    def __getitem__(self, i):
+      return self.nbests(i)
+
+    def __eq__(self, other):
+      return self.SerializeAsString() == other.SerializeAsString()
+
+
+# Register ImmutableNBestSentencePieceText in _sentencepiece:
+_sentencepiece.ImmutableNBestSentencePieceText_swigregister(ImmutableNBestSentencePieceText)
+
 class SentencePieceProcessor(object):
     thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag")
     __repr__ = _swig_repr
@@ -87,12 +179,6 @@ class SentencePieceProcessor(object):
     def LoadVocabulary(self, filename, threshold):
         return _sentencepiece.SentencePieceProcessor_LoadVocabulary(self, filename, threshold)
 
-    def SampleEncodeAndScoreAsPieces(self, input, num_samples, theta, wor, include_best):
-        return _sentencepiece.SentencePieceProcessor_SampleEncodeAndScoreAsPieces(self, input, num_samples, theta, wor, include_best)
-
-    def SampleEncodeAndScoreAsIds(self, input, num_samples, theta, wor, include_best):
-        return _sentencepiece.SentencePieceProcessor_SampleEncodeAndScoreAsIds(self, input, num_samples, theta, wor, include_best)
-
     def CalculateEntropy(self, *args):
         return _sentencepiece.SentencePieceProcessor_CalculateEntropy(self, *args)
 
@@ -147,6 +233,9 @@ class SentencePieceProcessor(object):
     def _EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece):
         return _sentencepiece.SentencePieceProcessor__EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
+    def _EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece)
+
     def _EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece):
         return _sentencepiece.SentencePieceProcessor__EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
@@ -156,6 +245,9 @@ class SentencePieceProcessor(object):
     def _EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece):
         return _sentencepiece.SentencePieceProcessor__EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
+    def _EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece)
+
     def _DecodeIds(self, ids):
         return _sentencepiece.SentencePieceProcessor__DecodeIds(self, ids)
 
@@ -168,6 +260,12 @@ class SentencePieceProcessor(object):
     def _DecodePiecesAsSerializedProto(self, pieces):
         return _sentencepiece.SentencePieceProcessor__DecodePiecesAsSerializedProto(self, pieces)
 
+    def _DecodeIdsAsImmutableProto(self, ids):
+        return _sentencepiece.SentencePieceProcessor__DecodeIdsAsImmutableProto(self, ids)
+
+    def _DecodePiecesAsImmutableProto(self, pieces):
+        return _sentencepiece.SentencePieceProcessor__DecodePiecesAsImmutableProto(self, pieces)
+
     def _DecodeIdsBatch(self, ins, num_threads):
         return _sentencepiece.SentencePieceProcessor__DecodeIdsBatch(self, ins, num_threads)
 
@@ -180,6 +278,9 @@ class SentencePieceProcessor(object):
     def _DecodePiecesAsSerializedProtoBatch(self, ins, num_threads):
         return _sentencepiece.SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch(self, ins, num_threads)
 
+    def _DecodePiecesAsImmutableProtoBatch(self, ins, num_threads):
+        return _sentencepiece.SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(self, ins, num_threads)
+
     def _NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece):
         return _sentencepiece.SentencePieceProcessor__NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece)
 
@@ -189,17 +290,26 @@ class SentencePieceProcessor(object):
     def _NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece):
         return _sentencepiece.SentencePieceProcessor__NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece)
 
-    def _SampleEncodeAndScoreAsIds(self, text, num_samples, theta, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece):
-        return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsIds(self, text, num_samples, theta, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece)
+    def _NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece)
+
+    def _SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece)
 
-    def _SampleEncodeAndScoreAsPieces(self, text, num_samples, theta, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece):
-        return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsPieces(self, text, num_samples, theta, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece)
+    def _SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece)
 
-    def _CalculateEntropy(self, text, theta):
-        return _sentencepiece.SentencePieceProcessor__CalculateEntropy(self, text, theta)
+    def _SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece)
 
-    def _CalculateEntropyBatch(self, ins, theta, num_threads):
-        return _sentencepiece.SentencePieceProcessor__CalculateEntropyBatch(self, ins, theta, num_threads)
+    def _SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece):
+        return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece)
+
+    def _CalculateEntropy(self, text, alpha):
+        return _sentencepiece.SentencePieceProcessor__CalculateEntropy(self, text, alpha)
+
+    def _CalculateEntropyBatch(self, ins, alpha, num_threads):
+        return _sentencepiece.SentencePieceProcessor__CalculateEntropyBatch(self, ins, alpha, num_threads)
 
     def Init(self,
              model_file=None,
@@ -319,9 +429,12 @@ class SentencePieceProcessor(object):
         if out_type is str:
           return self._EncodeAsPiecesBatch(input, num_threads, enable_sampling, nbest_size,
                                            alpha, add_bos, add_eos, reverse, emit_unk_piece)
-        if out_type == 'proto':
+        if out_type == 'serialized_proto' or out_type == 'proto':
           return self._EncodeAsSerializedProtoBatch(input, num_threads, enable_sampling, nbest_size,
                                                     alpha, add_bos, add_eos, reverse, emit_unk_piece)
+        if out_type == 'immutable_proto':
+          return self._EncodeAsImmutableProtoBatch(input, num_threads, enable_sampling, nbest_size,
+                                                   alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
       if out_type is int:
         return self._EncodeAsIds(input, enable_sampling, nbest_size,
@@ -329,9 +442,12 @@ class SentencePieceProcessor(object):
       if out_type is str:
         return self._EncodeAsPieces(input, enable_sampling, nbest_size,
                                     alpha, add_bos, add_eos, reverse, emit_unk_piece)
-      if out_type == 'proto':
+      if out_type == 'serialized_proto' or out_type == 'proto':
         return self._EncodeAsSerializedProto(input, enable_sampling, nbest_size,
                                              alpha, add_bos, add_eos, reverse, emit_unk_piece)
+      if out_type == 'immutable_proto':
+        return self._EncodeAsImmutableProto(input, enable_sampling, nbest_size,
+                                            alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
       raise RuntimeError('unknown out_type={}'.format(out_type))
       return None
@@ -346,7 +462,11 @@ class SentencePieceProcessor(object):
 
 
     def EncodeAsSerializedProto(self, input, **kwargs):
-      return self.Encode(input=input, out_type='proto', **kwargs)
+      return self.Encode(input=input, out_type='serialized_proto', **kwargs)
+
+
+    def EncodeAsImmutableProto(self, input, **kwargs):
+      return self.Encode(input=input, out_type='immutable_proto', **kwargs)
 
 
     def SampleEncodeAsPieces(self, input, nbest_size=None, alpha=None, **kwargs):
@@ -361,7 +481,12 @@ class SentencePieceProcessor(object):
 
     def SampleEncodeAsSerializedProto(self, input, nbest_size=None, alpha=None, **kwargs):
       return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha,
-                         out_type='proto', enable_sampling=True, **kwargs)
+                         out_type='serialized_proto', enable_sampling=True, **kwargs)
+
+
+    def SampleEncodeAsImmutableProto(self, input, nbest_size=None, alpha=None, **kwargs):
+      return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha,
+                         out_type='immutable_proto', enable_sampling=True, **kwargs)
 
 
     def NBestEncode(self,
@@ -407,9 +532,12 @@ class SentencePieceProcessor(object):
         if out_type is str:
           return self._NBestEncodeAsPieces(text, nbest_size,
                                            add_bos, add_eos, reverse, emit_unk_piece)
-        if out_type == 'proto':
+        if out_type == 'serialized_proto' or out_type == 'proto':
           return self._NBestEncodeAsSerializedProto(text, nbest_size,
                                                     add_bos, add_eos, reverse, emit_unk_piece)
+        if out_type == 'immutable_proto':
+          return self._NBestEncodeAsImmutableProto(text, nbest_size,
+                                                   add_bos, add_eos, reverse, emit_unk_piece)
 
       if type(input) is list:
         return [_encode(n) for n in input]
@@ -429,7 +557,12 @@ class SentencePieceProcessor(object):
 
     def NBestEncodeAsSerializedProto(self, input, nbest_size=None, **kwargs):
       return self.NBestEncode(input=input, nbest_size=nbest_size,
-                              out_type='proto', **kwargs)
+                              out_type='serialized_proto', **kwargs)
+
+
+    def NBestEncodeAsImmutableProto(self, input, nbest_size=None, **kwargs):
+      return self.NBestEncode(input=input, nbest_size=nbest_size,
+                              out_type='immutable_proto', **kwargs)
 
 
     def SampleEncodeAndScore(self,
@@ -440,20 +573,20 @@ class SentencePieceProcessor(object):
                              reverse=None,
                              emit_unk_piece=None,
                              num_samples=None,
-                             theta=None,
+                             alpha=None,
                              wor=None,
                              include_best=None):
       """SampleEncodeAndScore text input to segmented ids or tokens.
 
         Args:
         input: input string. accepsts list of string.
-        out_type: output type. int or str or 'proto'.
+        out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
         add_bos: Add <s> to the result (Default = false)
         add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
         reverse: Reverses the tokenized sequence (Default = false)
         emit_unk_piece: Emits the unk literal string (Default = false)
         num_samples: How many samples to return (Default = 1)
-        theta: inverse temperature for sampling
+        alpha: inverse temperature for sampling
         wor: whether to sample without replacement (Default = false)
         include_best: whether to include the best tokenization, requires wor=True (Default = false)
       """
@@ -470,8 +603,8 @@ class SentencePieceProcessor(object):
         emit_unk_piece = self._emit_unk_piece
       if num_samples is None:
         num_samples = 1
-      if theta is None:
-        theta = 1.
+      if alpha is None:
+        alpha = 1.
       if wor is None:
         wor = False
       if include_best is None:
@@ -486,10 +619,10 @@ class SentencePieceProcessor(object):
 
       def _encode(text):
         if out_type is int:
-          return self._SampleEncodeAndScoreAsIds(text, num_samples, theta, wor, include_best,
+          return self._SampleEncodeAndScoreAsIds(text, num_samples, alpha, wor, include_best,
                                                  add_bos, add_eos, reverse, emit_unk_piece)
         else:
-          return self._SampleEncodeAndScoreAsPieces(text, num_samples, theta, wor, include_best,
+          return self._SampleEncodeAndScoreAsPieces(text, num_samples, alpha, wor, include_best,
                                                     add_bos, add_eos, reverse, emit_unk_piece)
 
       if type(input) is list:
@@ -502,7 +635,7 @@ class SentencePieceProcessor(object):
       """Decode processed id or token sequences.
 
       Args:
-        out_type: output type. str or 'proto' (Default = str)
+        out_type: output type. str or 'serialized_proto' or 'immutable_proto' (Default = str)
         num_threads: the number of threads used in the batch processin (Default = 1).
       """
 
@@ -533,7 +666,7 @@ class SentencePieceProcessor(object):
             if type(input[0][0]) is str:
              return self._DecodePiecesBatch(input, num_threads)
 
-      if out_type == 'proto':
+      if out_type == 'serialized_proto':
         if type(input) is int:
           return self._DecodeIdsAsSerializedProto([input])
         if type(input) is str:
@@ -552,6 +685,25 @@ class SentencePieceProcessor(object):
              return self._DecodePiecesAsSerializedProtoBatch(input, num_threads)
 
 
+      if out_type == 'immutable_proto':
+        if type(input) is int:
+          return self._DecodeIdsAsImmutableProto([input])
+        if type(input) is str:
+          return self._DecodePiecesAsImmutableProto([input])
+
+        if type(input) is list:
+          if len(input) == 0 or type(input[0]) is int:
+            return self._DecodeIdsAsImmutableProto(input)
+          if type(input[0]) is str:
+            return self._DecodePiecesAsImmutableProto(input)
+
+          if type(input[0]) is list:
+            if len(input[0]) == 0 or type(input[0][0]) is int:
+             return self._DecodeIdsAsImmutableProtoBatch(input, num_threads)
+            if type(input[0][0]) is str:
+             return self._DecodePiecesAsImmutableProtoBatch(input, num_threads)
+
+
       raise RuntimeError('unknown output or input type')
       return None
 
@@ -564,24 +716,32 @@ class SentencePieceProcessor(object):
       return self.Decode(input=input, out_type=out_type, **kwargs)
 
 
-    def DecodePiecesAsSerializedProto(self, input, out_type='proto', **kwargs):
+    def DecodePiecesAsSerializedProto(self, input, out_type='serialized_proto', **kwargs):
+      return self.Decode(input=input, out_type=out_type, **kwargs)
+
+
+    def DecodeIdsAsSerializedProto(self, input, out_type='serialized_proto', **kwargs):
+      return self.Decode(input=input, out_type=out_type, **kwargs)
+
+
+    def DecodePiecesAsImmutableProto(self, input, out_type='immutable_proto', **kwargs):
       return self.Decode(input=input, out_type=out_type, **kwargs)
 
 
-    def DecodeIdsAsSerializedProto(self, input, out_type='proto', **kwargs):
+    def DecodeIdsAsImmutableProto(self, input, out_type='immutable_proto', **kwargs):
       return self.Decode(input=input, out_type=out_type, **kwargs)
 
 
-    def CalculateEntropy(self, input, theta, num_threads=None):
+    def CalculateEntropy(self, input, alpha, num_threads=None):
       """Calculate sentence entropy"""
       if type(input) is list:
         if num_threads is None:
           num_threads = self._num_threads
         if num_threads is None or type(num_threads) is not int:
           raise RuntimeError('num_threads must be int')
-        return self._CalculateEntropyBatch(input, theta, num_threads)
+        return self._CalculateEntropyBatch(input, alpha, num_threads)
 
-      return self._CalculateEntropy(input, theta)
+      return self._CalculateEntropy(input, alpha)
 
 
     def piece_size(self):
index 40373ce94ba53e5fbea3a8b4610305f01486cd53..1e2e1e0880489f20327cfd3c051c9da3a3ba5d29 100644 (file)
@@ -166,7 +166,17 @@ inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
   if (add_bos || add_eos || reverse || emit_unk_piece) {
     throw sentencepiece::util::Status(
         sentencepiece::util::StatusCode::kUnimplemented,
-        "add_bos, add_eos, reverse, and emit_unk_piece is not supported in AsSerialize API");
+        "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API");
+  }
+}
+
+inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
+                       sentencepiece::ImmutableSentencePieceText *proto,
+                       bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) {
+  if (add_bos || add_eos || reverse || emit_unk_piece) {
+    throw sentencepiece::util::Status(
+        sentencepiece::util::StatusCode::kUnimplemented,
+        "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API");
   }
 }
 
@@ -216,7 +226,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
 #define DEFINE_ENCODE_BATCH_FUNC_IMPL(FuncName, InType, OutType)        \
   std::vector<OutType> outs(ins.size());                                \
-  InitNumThreads(ins, &num_threads);                                  \
+  InitNumThreads(ins, &num_threads);                                    \
   {                                                                     \
     ThreadPool pool(ins.size());                                        \
     for (int n = 0;  n < num_threads; ++n) {                            \
@@ -237,7 +247,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
 #define DEFINE_DECODE_BATCH_FUNC_IMPL(FuncName, InType, OutType)        \
   std::vector<OutType> outs(ins.size());                                \
-  InitNumThreads(ins, &num_threads);                                  \
+  InitNumThreads(ins, &num_threads);                                    \
   {                                                                     \
     ThreadPool pool(ins.size());                                        \
     for (int n = 0;  n < num_threads; ++n) {                            \
@@ -264,6 +274,8 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
   }
 }
 
+%apply unsigned int { uint32_t }
+
 %ignore sentencepiece::util::Status;
 %ignore sentencepiece::util::StatusCode;
 %ignore absl::string_view;
@@ -272,32 +284,48 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 %ignore sentencepiece::NormalizerSpec;
 %ignore sentencepiece::TrainerSpec;
 %ignore sentencepiece::SentencePieceProcessor::status;
+%ignore sentencepiece::ImmutableSentencePieceText::mutable_proto;
+%ignore sentencepiece::ImmutableSentencePieceText::pieces() const;
+%ignore sentencepiece::ImmutableNBestSentencePieceText::mutable_proto;
+%ignore sentencepiece::ImmutableNBestSentencePieceText::nbests() const;
 
 %ignore sentencepiece::SentencePieceProcessor::Encode;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncode;
+%ignore sentencepiece::SentencePieceProcessor::NBestEncode;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScore;
+%ignore sentencepiece::SentencePieceProcessor::Decode;
+
 %ignore sentencepiece::SentencePieceProcessor::EncodeAsPieces;
 %ignore sentencepiece::SentencePieceProcessor::EncodeAsIds;
-%ignore sentencepiece::SentencePieceProcessor::EncodeAsSerializedProto;
-%ignore sentencepiece::SentencePieceProcessor::SampleEncode;
 %ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsIds;
 %ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsPieces;
-%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsSerializedProto;
-%ignore sentencepiece::SentencePieceProcessor::NBestEncode;
-%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsPieces;
 %ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsIds;
-%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsSerializedProto;
-%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScore;
-
-%ignore sentencepiece::SentencePieceProcessor::Decode;
+%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsPieces;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsIds;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsPieces;
 %ignore sentencepiece::SentencePieceProcessor::DecodeIds;
 %ignore sentencepiece::SentencePieceProcessor::DecodePieces;
+
+%ignore sentencepiece::SentencePieceProcessor::EncodeAsSerializedProto;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsSerializedProto;
+%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsSerializedProto;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsSerializedProto;
 %ignore sentencepiece::SentencePieceProcessor::DecodePiecesAsSerializedProto;
 %ignore sentencepiece::SentencePieceProcessor::DecodeIdsAsSerializedProto;
 
+%ignore sentencepiece::SentencePieceProcessor::EncodeAsImmutableProto;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsImmutableProto;
+%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsImmutableProto;
+%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsImmutableProto;
+%ignore sentencepiece::SentencePieceProcessor::DecodePiecesAsImmutableProto;
+%ignore sentencepiece::SentencePieceProcessor::DecodeIdsAsImmutableProto;
+
 %ignore sentencepiece::SentencePieceProcessor::model_proto;
 %ignore sentencepiece::SentencePieceProcessor::Load;
 %ignore sentencepiece::SentencePieceProcessor::LoadOrDie;
 %ignore sentencepiece::pretokenizer::PretokenizerForTrainingInterface;
 %ignore sentencepiece::SentenceIterator;
+%ignore sentencepiece::ConvertToUnicodeSpans;
 %ignore sentencepiece::SentencePieceTrainer::Train;
 %ignore sentencepiece::SentencePieceTrainer::GetNormalizerSpec;
 %ignore sentencepiece::SentencePieceTrainer::PopulateNormalizerSpec;
@@ -351,6 +379,19 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
     return proto;
   }
 
+  sentencepiece::ImmutableSentencePieceText
+      _EncodeAsImmutableProto(absl::string_view text,
+                              bool enable_sampling,
+                              int nbest_size, float alpha,
+                              bool add_bos, bool add_eos, bool reverse,
+                              bool emit_unk_piece) const {
+    auto proto = enable_sampling ?
+                 $self->SampleEncodeAsImmutableProto(text, nbest_size, alpha) :
+                 $self->EncodeAsImmutableProto(text);
+    RewriteIds(*$self, &proto, add_bos, add_eos, reverse, emit_unk_piece);
+    return proto;
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // EncodeAs* (Batch request)
   std::vector<std::vector<int>> _EncodeAsIdsBatch(
@@ -381,6 +422,17 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
                                   sentencepiece::util::bytes);
   }
 
+  std::vector<sentencepiece::ImmutableSentencePieceText>
+      _EncodeAsImmutableProtoBatch(
+      const std::vector<absl::string_view> &ins, int num_threads,
+      bool enable_sampling, int nbest_size, float alpha,
+      bool add_bos, bool add_eos, bool reverse,
+      bool emit_unk_piece) const {
+    DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsImmutableProto,
+                                  absl::string_view,
+                                  sentencepiece::ImmutableSentencePieceText);
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // DecodeAs* (Single request)
   std::string _DecodeIds(const std::vector<int> &ids) const {
@@ -404,6 +456,18 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
     return $self->DecodePiecesAsSerializedProto(pieces);
   }
 
+  sentencepiece::ImmutableSentencePieceText _DecodeIdsAsImmutableProto(
+      const std::vector<int> &ids) const {
+    CheckIds(ids, $self->GetPieceSize());
+    return $self->DecodeIdsAsImmutableProto(ids);
+  }
+
+  sentencepiece::ImmutableSentencePieceText _DecodePiecesAsImmutableProto(
+      const std::vector<absl::string_view> &pieces) const {
+    CheckIds(pieces, $self->GetPieceSize());
+    return $self->DecodePiecesAsImmutableProto(pieces);
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // DecodeAs* (Batch request)
   std::vector<std::string> _DecodeIdsBatch(
@@ -428,6 +492,13 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
                                   sentencepiece::util::bytes);
   }
 
+  std::vector<sentencepiece::ImmutableSentencePieceText>
+      _DecodePiecesAsImmutableProtoBatch(
+          const std::vector<std::vector<absl::string_view>> &ins, int num_threads) const {
+    DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsImmutableProto, std::string,
+                                  sentencepiece::ImmutableSentencePieceText);
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   // NBestEncodeAs* (Single request)
   std::vector<std::vector<int>>
@@ -454,25 +525,37 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
     return piecess;
   }
 
-  sentencepiece::util::bytes _NBestEncodeAsSerializedProto(absl::string_view text,
-                                                           int nbest_size,
-                                                           bool add_bos, bool add_eos, bool reverse,
-                                                           bool emit_unk_piece) const {
+  sentencepiece::util::bytes
+      _NBestEncodeAsSerializedProto(absl::string_view text,
+                                    int nbest_size,
+                                    bool add_bos, bool add_eos, bool reverse,
+                                    bool emit_unk_piece) const {
     RewriteIds(*$self, static_cast<sentencepiece::util::bytes *>(nullptr),
                add_bos, add_eos, reverse, emit_unk_piece);
     return $self->NBestEncodeAsSerializedProto(text, nbest_size);
   }
 
+  sentencepiece::ImmutableNBestSentencePieceText
+      _NBestEncodeAsImmutableProto(absl::string_view text,
+                                   int nbest_size,
+                                   bool add_bos, bool add_eos, bool reverse,
+                                   bool emit_unk_piece) const {
+    RewriteIds(*$self, static_cast<sentencepiece::ImmutableSentencePieceText *>(nullptr),
+               add_bos, add_eos, reverse, emit_unk_piece);
+    return $self->NBestEncodeAsImmutableProto(text, nbest_size);
+  }
+
+
   /////////////////////////////////////////////////////////////////////////////
   // SampleEncodeAndScoreAs* (Single request)
   std::vector<std::pair<std::vector<int>, float>>
       _SampleEncodeAndScoreAsIds(absl::string_view text,
-                                 int num_samples, float theta, bool wor,
+                                 int num_samples, float alpha, bool wor,
                                  bool include_best,
                                  bool add_bos, bool add_eos, bool reverse,
                                  bool emit_unk_piece) const {
     auto idss = $self->SampleEncodeAndScoreAsIds(text, num_samples,
-                                                 theta, wor, include_best);
+                                                 alpha, wor, include_best);
     for (auto &ids : idss) {
       RewriteIds(*$self, &ids.first, add_bos, add_eos, reverse, emit_unk_piece);
     }
@@ -481,25 +564,50 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
   std::vector<std::pair<std::vector<std::string>, float>>
       _SampleEncodeAndScoreAsPieces(absl::string_view text,
-                                    int num_samples, float theta, bool wor,
+                                    int num_samples, float alpha, bool wor,
                                     bool include_best,
                                     bool add_bos, bool add_eos, bool reverse,
                                     bool emit_unk_piece) const {
     auto piecess = $self->SampleEncodeAndScoreAsPieces(text, num_samples,
-                                                       theta, wor, include_best);
+                                                       alpha, wor, include_best);
     for (auto &pieces : piecess) {
       RewriteIds(*$self, &pieces.first, add_bos, add_eos, reverse, emit_unk_piece);
     }
     return piecess;
   }
 
+  sentencepiece::util::bytes
+      _SampleEncodeAndScoreAsSerializedProto(absl::string_view text,
+                                             int num_samples, float alpha, bool wor,
+                                             bool include_best,
+                                             bool add_bos, bool add_eos, bool reverse,
+                                             bool emit_unk_piece) const {
+    RewriteIds(*$self, static_cast<sentencepiece::util::bytes *>(nullptr),
+               add_bos, add_eos, reverse, emit_unk_piece);
+    return $self->SampleEncodeAndScoreAsSerializedProto(text, num_samples,
+                                                        alpha, wor, include_best);
+  }
+
+  sentencepiece::ImmutableNBestSentencePieceText
+      _SampleEncodeAndScoreAsImmutableProto(absl::string_view text,
+                                            int num_samples, float alpha, bool wor,
+                                            bool include_best,
+                                            bool add_bos, bool add_eos, bool reverse,
+                                            bool emit_unk_piece) const {
+    RewriteIds(*$self, static_cast<sentencepiece::util::bytes *>(nullptr),
+               add_bos, add_eos, reverse, emit_unk_piece);
+    return $self->SampleEncodeAndScoreAsImmutableProto(text, num_samples,
+                                                       alpha, wor, include_best);
+  }
+
+
   // Calculate Entropy
-  float _CalculateEntropy(absl::string_view text, float theta)  {
-    return $self->CalculateEntropy(text, theta);
+  float _CalculateEntropy(absl::string_view text, float alpha)  {
+    return $self->CalculateEntropy(text, alpha);
   }
 
   std::vector<float> _CalculateEntropyBatch(const std::vector<absl::string_view> &ins,
-                                            float theta, int num_threads)  {
+                                            float alpha, int num_threads)  {
     std::vector<float> outs(ins.size());
     InitNumThreads(ins, &num_threads);
     {
@@ -507,7 +615,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
       for (int n = 0;  n < num_threads; ++n) {
         pool.Schedule([&, n]() {
             for (size_t i = n; i < ins.size(); i += num_threads) {
-              outs[i] = self->CalculateEntropy(ins[i], theta);
+              outs[i] = self->CalculateEntropy(ins[i], alpha);
           }
         });
       }
@@ -634,9 +742,12 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
       if out_type is str:
         return self._EncodeAsPiecesBatch(input, num_threads, enable_sampling, nbest_size,
                                          alpha, add_bos, add_eos, reverse, emit_unk_piece)
-      if out_type == 'proto':
+      if out_type == 'serialized_proto' or out_type == 'proto':
         return self._EncodeAsSerializedProtoBatch(input, num_threads, enable_sampling, nbest_size,
                                                   alpha, add_bos, add_eos, reverse, emit_unk_piece)
+      if out_type == 'immutable_proto':
+        return self._EncodeAsImmutableProtoBatch(input, num_threads, enable_sampling, nbest_size,
+                                                 alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
     if out_type is int:
       return self._EncodeAsIds(input, enable_sampling, nbest_size,
@@ -644,9 +755,12 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
     if out_type is str:
       return self._EncodeAsPieces(input, enable_sampling, nbest_size,
                                   alpha, add_bos, add_eos, reverse, emit_unk_piece)
-    if out_type == 'proto':
+    if out_type == 'serialized_proto' or out_type == 'proto':
       return self._EncodeAsSerializedProto(input, enable_sampling, nbest_size,
                                            alpha, add_bos, add_eos, reverse, emit_unk_piece)
+    if out_type == 'immutable_proto':
+      return self._EncodeAsImmutableProto(input, enable_sampling, nbest_size,
+                                          alpha, add_bos, add_eos, reverse, emit_unk_piece)
 
     raise RuntimeError('unknown out_type={}'.format(out_type))
     return None
@@ -661,7 +775,11 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
 
   def EncodeAsSerializedProto(self, input, **kwargs):
-    return self.Encode(input=input, out_type='proto', **kwargs)
+    return self.Encode(input=input, out_type='serialized_proto', **kwargs)
+
+
+  def EncodeAsImmutableProto(self, input, **kwargs):
+    return self.Encode(input=input, out_type='immutable_proto', **kwargs)
 
 
   def SampleEncodeAsPieces(self, input, nbest_size=None, alpha=None, **kwargs):
@@ -676,7 +794,12 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
   def SampleEncodeAsSerializedProto(self, input, nbest_size=None, alpha=None, **kwargs):
     return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha,
-                       out_type='proto', enable_sampling=True, **kwargs)
+                       out_type='serialized_proto', enable_sampling=True, **kwargs)
+
+
+  def SampleEncodeAsImmutableProto(self, input, nbest_size=None, alpha=None, **kwargs):
+    return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha,
+                       out_type='immutable_proto', enable_sampling=True, **kwargs)
 
 
   def NBestEncode(self,
@@ -722,9 +845,12 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
       if out_type is str:
         return self._NBestEncodeAsPieces(text, nbest_size,
                                          add_bos, add_eos, reverse, emit_unk_piece)
-      if out_type == 'proto':
+      if out_type == 'serialized_proto' or out_type == 'proto':
         return self._NBestEncodeAsSerializedProto(text, nbest_size,
                                                   add_bos, add_eos, reverse, emit_unk_piece)
+      if out_type == 'immutable_proto':
+        return self._NBestEncodeAsImmutableProto(text, nbest_size,
+                                                 add_bos, add_eos, reverse, emit_unk_piece)
 
     if type(input) is list:
       return [_encode(n) for n in input]
@@ -744,7 +870,12 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
   def NBestEncodeAsSerializedProto(self, input, nbest_size=None, **kwargs):
     return self.NBestEncode(input=input, nbest_size=nbest_size,
-                            out_type='proto', **kwargs)
+                            out_type='serialized_proto', **kwargs)
+
+
+  def NBestEncodeAsImmutableProto(self, input, nbest_size=None, **kwargs):
+    return self.NBestEncode(input=input, nbest_size=nbest_size,
+                            out_type='immutable_proto', **kwargs)
 
 
   def SampleEncodeAndScore(self,
@@ -755,20 +886,20 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
                            reverse=None,
                            emit_unk_piece=None,
                            num_samples=None,
-                           theta=None,
+                           alpha=None,
                            wor=None,
                            include_best=None):
     """SampleEncodeAndScore text input to segmented ids or tokens.
 
       Args:
       input: input string. accepsts list of string.
-      out_type: output type. int or str or 'proto'.
+      out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
       add_bos: Add <s> to the result (Default = false)
       add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
       reverse: Reverses the tokenized sequence (Default = false)
       emit_unk_piece: Emits the unk literal string (Default = false)
       num_samples: How many samples to return (Default = 1)
-      theta: inverse temperature for sampling
+      alpha: inverse temperature for sampling
       wor: whether to sample without replacement (Default = false)
       include_best: whether to include the best tokenization, requires wor=True (Default = false)
     """
@@ -785,8 +916,8 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
       emit_unk_piece = self._emit_unk_piece
     if num_samples is None:
       num_samples = 1
-    if theta is None:
-      theta = 1.
+    if alpha is None:
+      alpha = 1.
     if wor is None:
       wor = False
     if include_best is None:
@@ -801,10 +932,10 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
     def _encode(text):
       if out_type is int:
-        return self._SampleEncodeAndScoreAsIds(text, num_samples, theta, wor, include_best,
+        return self._SampleEncodeAndScoreAsIds(text, num_samples, alpha, wor, include_best,
                                                add_bos, add_eos, reverse, emit_unk_piece)
       else:
-        return self._SampleEncodeAndScoreAsPieces(text, num_samples, theta, wor, include_best,
+        return self._SampleEncodeAndScoreAsPieces(text, num_samples, alpha, wor, include_best,
                                                   add_bos, add_eos, reverse, emit_unk_piece)
 
     if type(input) is list:
@@ -817,7 +948,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
     """Decode processed id or token sequences.
 
     Args:
-      out_type: output type. str or 'proto' (Default = str)
+      out_type: output type. str or 'serialized_proto' or 'immutable_proto' (Default = str)
       num_threads: the number of threads used in the batch processin (Default = 1).
     """
 
@@ -848,7 +979,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
           if type(input[0][0]) is str:
            return self._DecodePiecesBatch(input, num_threads)
 
-    if out_type == 'proto':
+    if out_type == 'serialized_proto':
       if type(input) is int:
         return self._DecodeIdsAsSerializedProto([input])
       if type(input) is str:
@@ -867,6 +998,25 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
            return self._DecodePiecesAsSerializedProtoBatch(input, num_threads)
 
 
+    if out_type == 'immutable_proto':
+      if type(input) is int:
+        return self._DecodeIdsAsImmutableProto([input])
+      if type(input) is str:
+        return self._DecodePiecesAsImmutableProto([input])
+
+      if type(input) is list:
+        if len(input) == 0 or type(input[0]) is int:
+          return self._DecodeIdsAsImmutableProto(input)
+        if type(input[0]) is str:
+          return self._DecodePiecesAsImmutableProto(input)
+
+        if type(input[0]) is list:
+          if len(input[0]) == 0 or type(input[0][0]) is int:
+           return self._DecodeIdsAsImmutableProtoBatch(input, num_threads)
+          if type(input[0][0]) is str:
+           return self._DecodePiecesAsImmutableProtoBatch(input, num_threads)
+
+
     raise RuntimeError('unknown output or input type')
     return None
 
@@ -879,24 +1029,32 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
     return self.Decode(input=input, out_type=out_type, **kwargs)
 
 
-  def DecodePiecesAsSerializedProto(self, input, out_type='proto', **kwargs):
+  def DecodePiecesAsSerializedProto(self, input, out_type='serialized_proto', **kwargs):
+    return self.Decode(input=input, out_type=out_type, **kwargs)
+
+
+  def DecodeIdsAsSerializedProto(self, input, out_type='serialized_proto', **kwargs):
+    return self.Decode(input=input, out_type=out_type, **kwargs)
+
+
+  def DecodePiecesAsImmutableProto(self, input, out_type='immutable_proto', **kwargs):
     return self.Decode(input=input, out_type=out_type, **kwargs)
 
 
-  def DecodeIdsAsSerializedProto(self, input, out_type='proto', **kwargs):
+  def DecodeIdsAsImmutableProto(self, input, out_type='immutable_proto', **kwargs):
     return self.Decode(input=input, out_type=out_type, **kwargs)
 
 
-  def CalculateEntropy(self, input, theta, num_threads=None):
+  def CalculateEntropy(self, input, alpha, num_threads=None):
     """Calculate sentence entropy"""
     if type(input) is list:
       if num_threads is None:
         num_threads = self._num_threads
       if num_threads is None or type(num_threads) is not int:
         raise RuntimeError('num_threads must be int')
-      return self._CalculateEntropyBatch(input, theta, num_threads)
+      return self._CalculateEntropyBatch(input, alpha, num_threads)
 
-    return self._CalculateEntropy(input, theta)
+    return self._CalculateEntropy(input, alpha)
 
 
   def piece_size(self):
@@ -1028,6 +1186,50 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 }
 }
 
+%extend sentencepiece::ImmutableSentencePieceText {
+  ImmutableSentencePieceText_ImmutableSentencePiece pieces(int index) const {
+    if (index < 0 || index >= static_cast<int>($self->pieces_size())) {
+      throw sentencepiece::util::Status(
+          sentencepiece::util::StatusCode::kOutOfRange,
+          "piece index is out of range.");
+    }
+    return $self->pieces(index);
+  }
+
+%pythoncode {
+  def __len__(self):
+    return self.pieces_size()
+
+  def __getitem__(self, i):
+    return self.pieces(i)
+
+  def __eq__(self, other):
+    return self.SerializeAsString() == other.SerializeAsString()
+}
+}
+
+%extend sentencepiece::ImmutableNBestSentencePieceText {
+  ImmutableSentencePieceText nbests(int index) const {
+    if (index < 0 || index >= static_cast<int>($self->nbests_size())) {
+      throw sentencepiece::util::Status(
+          sentencepiece::util::StatusCode::kOutOfRange,
+          "nbest index is out of range.");
+    }
+    return $self->nbests(index);
+  }
+
+%pythoncode {
+  def __len__(self):
+    return self.nbests_size()
+
+  def __getitem__(self, i):
+    return self.nbests(i)
+
+  def __eq__(self, other):
+    return self.SerializeAsString() == other.SerializeAsString()
+}
+}
+
 %typemap(out) std::vector<int> {
   $result = PyList_New($1.size());
   for (size_t i = 0; i < $1.size(); ++i) {
@@ -1277,6 +1479,14 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
   }
 }
 
+%typemap(out) std::vector<sentencepiece::ImmutableSentencePieceText> {
+  $result = PyList_New($1.size());
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText($1.at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0);
+    PyList_SET_ITEM($result, i, obj);
+  }
+}
+
 %typemap(in) sentencepiece::SentenceIterator * {
   sentencepiece::SentenceIterator *out = nullptr;
   if (PyIter_Check($input)) {
@@ -1324,6 +1534,18 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
   delete $1;
 }
 
+%typemap(freearg) sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece {
+  delete $1;
+}
+
+%typemap(freearg) sentencepiece::ImmutableSentencePieceText {
+  delete $1;
+}
+
+%typemap(freearg) sentencepiece::ImmutableNBestSentencePieceText {
+  delete $1;
+}
+
 %include <sentencepiece_processor.h>
 %include <sentencepiece_trainer.h>
 
index 36ce38c34c101e6e02a6da862ed9553b8dbd09fb..9776b0f562a41c50b138dc908e1e6cd8f9b787f7 100644 (file)
@@ -2694,17 +2694,20 @@ SWIGINTERN PyObject *SWIG_PyStaticMethod_New(PyObject *SWIGUNUSEDPARM(self), PyO
 
 #define SWIGTYPE_p_char swig_types[0]
 #define SWIGTYPE_p_float swig_types[1]
-#define SWIGTYPE_p_sentencepiece__SentenceIterator swig_types[2]
-#define SWIGTYPE_p_sentencepiece__SentencePieceProcessor swig_types[3]
-#define SWIGTYPE_p_sentencepiece__SentencePieceTrainer swig_types[4]
-#define SWIGTYPE_p_std__string swig_types[5]
-#define SWIGTYPE_p_std__unordered_mapT_std__string_std__string_t swig_types[6]
-#define SWIGTYPE_p_std__vectorT_absl__string_view_t swig_types[7]
-#define SWIGTYPE_p_std__vectorT_int_t swig_types[8]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_absl__string_view_t_t swig_types[9]
-#define SWIGTYPE_p_std__vectorT_std__vectorT_int_t_t swig_types[10]
-static swig_type_info *swig_types[12];
-static swig_module_info swig_module = {swig_types, 11, 0, 0, 0, 0};
+#define SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText swig_types[2]
+#define SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText swig_types[3]
+#define SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece swig_types[4]
+#define SWIGTYPE_p_sentencepiece__SentenceIterator swig_types[5]
+#define SWIGTYPE_p_sentencepiece__SentencePieceProcessor swig_types[6]
+#define SWIGTYPE_p_sentencepiece__SentencePieceTrainer swig_types[7]
+#define SWIGTYPE_p_std__string swig_types[8]
+#define SWIGTYPE_p_std__unordered_mapT_std__string_std__string_t swig_types[9]
+#define SWIGTYPE_p_std__vectorT_absl__string_view_t swig_types[10]
+#define SWIGTYPE_p_std__vectorT_int_t swig_types[11]
+#define SWIGTYPE_p_std__vectorT_std__vectorT_absl__string_view_t_t swig_types[12]
+#define SWIGTYPE_p_std__vectorT_std__vectorT_int_t_t swig_types[13]
+static swig_type_info *swig_types[15];
+static swig_module_info swig_module = {swig_types, 14, 0, 0, 0, 0};
 #define SWIG_TypeQuery(name) SWIG_TypeQueryModule(&swig_module, &swig_module, name)
 #define SWIG_MangledTypeQuery(name) SWIG_MangledTypeQueryModule(&swig_module, &swig_module, name)
 
@@ -2972,7 +2975,17 @@ inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
   if (add_bos || add_eos || reverse || emit_unk_piece) {
     throw sentencepiece::util::Status(
         sentencepiece::util::StatusCode::kUnimplemented,
-        "add_bos, add_eos, reverse, and emit_unk_piece is not supported in AsSerialize API");
+        "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API");
+  }
+}
+
+inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
+                       sentencepiece::ImmutableSentencePieceText *proto,
+                       bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) {
+  if (add_bos || add_eos || reverse || emit_unk_piece) {
+    throw sentencepiece::util::Status(
+        sentencepiece::util::StatusCode::kUnimplemented,
+        "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API");
   }
 }
 
@@ -3022,7 +3035,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
 #define DEFINE_ENCODE_BATCH_FUNC_IMPL(FuncName, InType, OutType)        \
   std::vector<OutType> outs(ins.size());                                \
-  InitNumThreads(ins, &num_threads);                                  \
+  InitNumThreads(ins, &num_threads);                                    \
   {                                                                     \
     ThreadPool pool(ins.size());                                        \
     for (int n = 0;  n < num_threads; ++n) {                            \
@@ -3043,7 +3056,7 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 
 #define DEFINE_DECODE_BATCH_FUNC_IMPL(FuncName, InType, OutType)        \
   std::vector<OutType> outs(ins.size());                                \
-  InitNumThreads(ins, &num_threads);                                  \
+  InitNumThreads(ins, &num_threads);                                    \
   {                                                                     \
     ThreadPool pool(ins.size());                                        \
     for (int n = 0;  n < num_threads; ++n) {                            \
@@ -3060,131 +3073,24 @@ inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
 }  // namespace
 
 
-SWIGINTERN swig_type_info*
-SWIG_pchar_descriptor(void)
+SWIGINTERNINLINE PyObject*
+  SWIG_From_unsigned_SS_int  (unsigned int value)
 {
-  static int init = 0;
-  static swig_type_info* info = 0;
-  if (!init) {
-    info = SWIG_TypeQuery("_p_char");
-    init = 1;
-  }
-  return info;
+  return PyInt_FromSize_t((size_t) value);
 }
 
 
-SWIGINTERN int
-SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
-{
-#if PY_VERSION_HEX>=0x03000000
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-  if (PyBytes_Check(obj))
-#else
-  if (PyUnicode_Check(obj))
-#endif
-#else  
-  if (PyString_Check(obj))
-#endif
-  {
-    char *cstr; Py_ssize_t len;
-    int ret = SWIG_OK;
-#if PY_VERSION_HEX>=0x03000000
-#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-    if (!alloc && cptr) {
-        /* We can't allow converting without allocation, since the internal
-           representation of string in Python 3 is UCS-2/UCS-4 but we require
-           a UTF-8 representation.
-           TODO(bhy) More detailed explanation */
-        return SWIG_RuntimeError;
-    }
-    obj = PyUnicode_AsUTF8String(obj);
-    if (!obj)
-      return SWIG_TypeError;
-    if (alloc)
-      *alloc = SWIG_NEWOBJ;
-#endif
-    if (PyBytes_AsStringAndSize(obj, &cstr, &len) == -1)
-      return SWIG_TypeError;
-#else
-    if (PyString_AsStringAndSize(obj, &cstr, &len) == -1)
-      return SWIG_TypeError;
-#endif
-    if (cptr) {
-      if (alloc) {
-       if (*alloc == SWIG_NEWOBJ) {
-         *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1)));
-         *alloc = SWIG_NEWOBJ;
-       } else {
-         *cptr = cstr;
-         *alloc = SWIG_OLDOBJ;
-       }
-      } else {
-#if PY_VERSION_HEX>=0x03000000
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-       *cptr = PyBytes_AsString(obj);
-#else
-       assert(0); /* Should never reach here with Unicode strings in Python 3 */
-#endif
-#else
-       *cptr = SWIG_Python_str_AsChar(obj);
-        if (!*cptr)
-          ret = SWIG_TypeError;
-#endif
-      }
-    }
-    if (psize) *psize = len + 1;
-#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-    Py_XDECREF(obj);
-#endif
-    return ret;
-  } else {
-#if defined(SWIG_PYTHON_2_UNICODE)
-#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
-#error "Cannot use both SWIG_PYTHON_2_UNICODE and SWIG_PYTHON_STRICT_BYTE_CHAR at once"
-#endif
-#if PY_VERSION_HEX<0x03000000
-    if (PyUnicode_Check(obj)) {
-      char *cstr; Py_ssize_t len;
-      if (!alloc && cptr) {
-        return SWIG_RuntimeError;
-      }
-      obj = PyUnicode_AsUTF8String(obj);
-      if (!obj)
-        return SWIG_TypeError;
-      if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
-        if (cptr) {
-          if (alloc) *alloc = SWIG_NEWOBJ;
-          *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1)));
-        }
-        if (psize) *psize = len + 1;
+  #define SWIG_From_long   PyInt_FromLong 
 
-        Py_XDECREF(obj);
-        return SWIG_OK;
-      } else {
-        Py_XDECREF(obj);
-      }
-    }
-#endif
-#endif
 
-    swig_type_info* pchar_descriptor = SWIG_pchar_descriptor();
-    if (pchar_descriptor) {
-      void* vptr = 0;
-      if (SWIG_ConvertPtr(obj, &vptr, pchar_descriptor, 0) == SWIG_OK) {
-       if (cptr) *cptr = (char *) vptr;
-       if (psize) *psize = vptr ? (strlen((char *)vptr) + 1) : 0;
-       if (alloc) *alloc = SWIG_OLDOBJ;
-       return SWIG_OK;
-      }
-    }
-  }
-  return SWIG_TypeError;
+SWIGINTERNINLINE PyObject* 
+SWIG_From_unsigned_SS_long  (unsigned long value)
+{
+  return (value > LONG_MAX) ?
+    PyLong_FromUnsignedLong(value) : PyInt_FromLong(static_cast< long >(value));
 }
 
 
-
-
-
 #include <limits.h>
 #if !defined(SWIG_NO_LLONG_MAX)
 # if !defined(LLONG_MAX) && defined(__GNUC__) && defined (__LONG_LONG_MAX__)
@@ -3195,6 +3101,47 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
 #endif
 
 
+#if defined(LLONG_MAX) && !defined(SWIG_LONG_LONG_AVAILABLE)
+#  define SWIG_LONG_LONG_AVAILABLE
+#endif
+
+
+#ifdef SWIG_LONG_LONG_AVAILABLE
+SWIGINTERNINLINE PyObject* 
+SWIG_From_unsigned_SS_long_SS_long  (unsigned long long value)
+{
+  return (value > LONG_MAX) ?
+    PyLong_FromUnsignedLongLong(value) : PyInt_FromLong(static_cast< long >(value));
+}
+#endif
+
+
+SWIGINTERNINLINE PyObject *
+SWIG_From_size_t  (size_t value)
+{    
+#ifdef SWIG_LONG_LONG_AVAILABLE
+  if (sizeof(size_t) <= sizeof(unsigned long)) {
+#endif
+    return SWIG_From_unsigned_SS_long  (static_cast< unsigned long >(value));
+#ifdef SWIG_LONG_LONG_AVAILABLE
+  } else {
+    /* assume sizeof(size_t) <= sizeof(unsigned long long) */
+    return SWIG_From_unsigned_SS_long_SS_long  (static_cast< unsigned long long >(value));
+  }
+#endif
+}
+
+
+  #define SWIG_From_double   PyFloat_FromDouble 
+
+
+SWIGINTERNINLINE PyObject *
+SWIG_From_float  (float value)
+{    
+  return SWIG_From_double  (value);
+}
+
+
 SWIGINTERN int
 SWIG_AsVal_double (PyObject *obj, double *val)
 {
@@ -3335,98 +3282,215 @@ SWIG_AsVal_int (PyObject * obj, int *val)
   return res;
 }
 
-
-/* Getting isfinite working pre C99 across multiple platforms is non-trivial. Users can provide SWIG_isfinite on older platforms. */
-#ifndef SWIG_isfinite
-/* isfinite() is a macro for C99 */
-# if defined(isfinite)
-#  define SWIG_isfinite(X) (isfinite(X))
-# elif defined(__cplusplus) && __cplusplus >= 201103L
-/* Use a template so that this works whether isfinite() is std::isfinite() or
- * in the global namespace.  The reality seems to vary between compiler
- * versions.
- *
- * Make sure namespace std exists to avoid compiler warnings.
- *
- * extern "C++" is required as this fragment can end up inside an extern "C" { } block
- */
-namespace std { }
-extern "C++" template<typename T>
-inline int SWIG_isfinite_func(T x) {
-  using namespace std;
-  return isfinite(x);
-}
-#  define SWIG_isfinite(X) (SWIG_isfinite_func(X))
-# elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2))
-#  define SWIG_isfinite(X) (__builtin_isfinite(X))
-# elif defined(__clang__) && defined(__has_builtin)
-#  if __has_builtin(__builtin_isfinite)
-#   define SWIG_isfinite(X) (__builtin_isfinite(X))
-#  endif
-# elif defined(_MSC_VER)
-#  define SWIG_isfinite(X) (_finite(X))
-# elif defined(__sun) && defined(__SVR4)
-#  include <ieeefp.h>
-#  define SWIG_isfinite(X) (finite(X))
-# endif
-#endif
-
-
-/* Accept infinite as a valid float value unless we are unable to check if a value is finite */
-#ifdef SWIG_isfinite
-# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX) && SWIG_isfinite(X))
-#else
-# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX))
-#endif
-
-
-SWIGINTERN int
-SWIG_AsVal_float (PyObject * obj, float *val)
-{
-  double v;
-  int res = SWIG_AsVal_double (obj, &v);
-  if (SWIG_IsOK(res)) {
-    if (SWIG_Float_Overflow_Check(v)) {
-      return SWIG_OverflowError;
-    } else {
-      if (val) *val = static_cast< float >(v);
+SWIGINTERN sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece sentencepiece_ImmutableSentencePieceText_pieces(sentencepiece::ImmutableSentencePieceText const *self,int index){
+    if (index < 0 || index >= static_cast<int>(self->pieces_size())) {
+      throw sentencepiece::util::Status(
+          sentencepiece::util::StatusCode::kOutOfRange,
+          "piece index is out of range.");
     }
-  }  
-  return res;
-}
-
+    return self->pieces(index);
+  }
+SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_ImmutableNBestSentencePieceText_nbests(sentencepiece::ImmutableNBestSentencePieceText const *self,int index){
+    if (index < 0 || index >= static_cast<int>(self->nbests_size())) {
+      throw sentencepiece::util::Status(
+          sentencepiece::util::StatusCode::kOutOfRange,
+          "nbest index is out of range.");
+    }
+    return self->nbests(index);
+  }
 
-SWIGINTERN int
-SWIG_AsVal_bool (PyObject *obj, bool *val)
+SWIGINTERN swig_type_info*
+SWIG_pchar_descriptor(void)
 {
-  int r;
-  if (!PyBool_Check(obj))
-    return SWIG_ERROR;
-  r = PyObject_IsTrue(obj);
-  if (r == -1)
-    return SWIG_ERROR;
-  if (val) *val = r ? true : false;
-  return SWIG_OK;
-}
-
-
-  #define SWIG_From_double   PyFloat_FromDouble 
-
-
-SWIGINTERNINLINE PyObject *
-SWIG_From_float  (float value)
-{    
-  return SWIG_From_double  (value);
+  static int init = 0;
+  static swig_type_info* info = 0;
+  if (!init) {
+    info = SWIG_TypeQuery("_p_char");
+    init = 1;
+  }
+  return info;
 }
 
 
-SWIGINTERNINLINE PyObject*
-  SWIG_From_int  (int value)
+SWIGINTERN int
+SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
 {
-  return PyInt_FromLong((long) value);
-}
-
-
+#if PY_VERSION_HEX>=0x03000000
+#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
+  if (PyBytes_Check(obj))
+#else
+  if (PyUnicode_Check(obj))
+#endif
+#else  
+  if (PyString_Check(obj))
+#endif
+  {
+    char *cstr; Py_ssize_t len;
+    int ret = SWIG_OK;
+#if PY_VERSION_HEX>=0x03000000
+#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
+    if (!alloc && cptr) {
+        /* We can't allow converting without allocation, since the internal
+           representation of string in Python 3 is UCS-2/UCS-4 but we require
+           a UTF-8 representation.
+           TODO(bhy) More detailed explanation */
+        return SWIG_RuntimeError;
+    }
+    obj = PyUnicode_AsUTF8String(obj);
+    if (!obj)
+      return SWIG_TypeError;
+    if (alloc)
+      *alloc = SWIG_NEWOBJ;
+#endif
+    if (PyBytes_AsStringAndSize(obj, &cstr, &len) == -1)
+      return SWIG_TypeError;
+#else
+    if (PyString_AsStringAndSize(obj, &cstr, &len) == -1)
+      return SWIG_TypeError;
+#endif
+    if (cptr) {
+      if (alloc) {
+       if (*alloc == SWIG_NEWOBJ) {
+         *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1)));
+         *alloc = SWIG_NEWOBJ;
+       } else {
+         *cptr = cstr;
+         *alloc = SWIG_OLDOBJ;
+       }
+      } else {
+#if PY_VERSION_HEX>=0x03000000
+#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
+       *cptr = PyBytes_AsString(obj);
+#else
+       assert(0); /* Should never reach here with Unicode strings in Python 3 */
+#endif
+#else
+       *cptr = SWIG_Python_str_AsChar(obj);
+        if (!*cptr)
+          ret = SWIG_TypeError;
+#endif
+      }
+    }
+    if (psize) *psize = len + 1;
+#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
+    Py_XDECREF(obj);
+#endif
+    return ret;
+  } else {
+#if defined(SWIG_PYTHON_2_UNICODE)
+#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
+#error "Cannot use both SWIG_PYTHON_2_UNICODE and SWIG_PYTHON_STRICT_BYTE_CHAR at once"
+#endif
+#if PY_VERSION_HEX<0x03000000
+    if (PyUnicode_Check(obj)) {
+      char *cstr; Py_ssize_t len;
+      if (!alloc && cptr) {
+        return SWIG_RuntimeError;
+      }
+      obj = PyUnicode_AsUTF8String(obj);
+      if (!obj)
+        return SWIG_TypeError;
+      if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
+        if (cptr) {
+          if (alloc) *alloc = SWIG_NEWOBJ;
+          *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1)));
+        }
+        if (psize) *psize = len + 1;
+
+        Py_XDECREF(obj);
+        return SWIG_OK;
+      } else {
+        Py_XDECREF(obj);
+      }
+    }
+#endif
+#endif
+
+    swig_type_info* pchar_descriptor = SWIG_pchar_descriptor();
+    if (pchar_descriptor) {
+      void* vptr = 0;
+      if (SWIG_ConvertPtr(obj, &vptr, pchar_descriptor, 0) == SWIG_OK) {
+       if (cptr) *cptr = (char *) vptr;
+       if (psize) *psize = vptr ? (strlen((char *)vptr) + 1) : 0;
+       if (alloc) *alloc = SWIG_OLDOBJ;
+       return SWIG_OK;
+      }
+    }
+  }
+  return SWIG_TypeError;
+}
+
+
+
+
+
+/* Getting isfinite working pre C99 across multiple platforms is non-trivial. Users can provide SWIG_isfinite on older platforms. */
+#ifndef SWIG_isfinite
+/* isfinite() is a macro for C99 */
+# if defined(isfinite)
+#  define SWIG_isfinite(X) (isfinite(X))
+# elif defined(__cplusplus) && __cplusplus >= 201103L
+/* Use a template so that this works whether isfinite() is std::isfinite() or
+ * in the global namespace.  The reality seems to vary between compiler
+ * versions.
+ *
+ * Make sure namespace std exists to avoid compiler warnings.
+ *
+ * extern "C++" is required as this fragment can end up inside an extern "C" { } block
+ */
+namespace std { }
+extern "C++" template<typename T>
+inline int SWIG_isfinite_func(T x) {
+  using namespace std;
+  return isfinite(x);
+}
+#  define SWIG_isfinite(X) (SWIG_isfinite_func(X))
+# elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2))
+#  define SWIG_isfinite(X) (__builtin_isfinite(X))
+# elif defined(__clang__) && defined(__has_builtin)
+#  if __has_builtin(__builtin_isfinite)
+#   define SWIG_isfinite(X) (__builtin_isfinite(X))
+#  endif
+# elif defined(_MSC_VER)
+#  define SWIG_isfinite(X) (_finite(X))
+# elif defined(__sun) && defined(__SVR4)
+#  include <ieeefp.h>
+#  define SWIG_isfinite(X) (finite(X))
+# endif
+#endif
+
+
+/* Accept infinite as a valid float value unless we are unable to check if a value is finite */
+#ifdef SWIG_isfinite
+# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX) && SWIG_isfinite(X))
+#else
+# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX))
+#endif
+
+
+SWIGINTERN int
+SWIG_AsVal_float (PyObject * obj, float *val)
+{
+  double v;
+  int res = SWIG_AsVal_double (obj, &v);
+  if (SWIG_IsOK(res)) {
+    if (SWIG_Float_Overflow_Check(v)) {
+      return SWIG_OverflowError;
+    } else {
+      if (val) *val = static_cast< float >(v);
+    }
+  }  
+  return res;
+}
+
+
+SWIGINTERNINLINE PyObject*
+  SWIG_From_int  (int value)
+{
+  return PyInt_FromLong((long) value);
+}
+
+
 SWIGINTERNINLINE PyObject*
   SWIG_From_bool  (bool value)
 {
@@ -3436,6 +3500,20 @@ SWIGINTERNINLINE PyObject*
 SWIGINTERN sentencepiece::util::Status sentencepiece_SentencePieceProcessor_LoadFromFile(sentencepiece::SentencePieceProcessor *self,absl::string_view arg){
     return self->Load(arg);
   }
+
+SWIGINTERN int
+SWIG_AsVal_bool (PyObject *obj, bool *val)
+{
+  int r;
+  if (!PyBool_Check(obj))
+    return SWIG_ERROR;
+  r = PyObject_IsTrue(obj);
+  if (r == -1)
+    return SWIG_ERROR;
+  if (val) *val = r ? true : false;
+  return SWIG_OK;
+}
+
 SWIGINTERN std::vector< int > sentencepiece_SentencePieceProcessor__EncodeAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
     auto ids = enable_sampling ?
                self->SampleEncodeAsIds(text, nbest_size, alpha) :
@@ -3457,6 +3535,13 @@ SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__Enco
     RewriteIds(*self, &proto, add_bos, add_eos, reverse, emit_unk_piece);
     return proto;
   }
+SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_SentencePieceProcessor__EncodeAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+    auto proto = enable_sampling ?
+                 self->SampleEncodeAsImmutableProto(text, nbest_size, alpha) :
+                 self->EncodeAsImmutableProto(text);
+    RewriteIds(*self, &proto, add_bos, add_eos, reverse, emit_unk_piece);
+    return proto;
+  }
 SWIGINTERN std::vector< std::vector< int > > sentencepiece_SentencePieceProcessor__EncodeAsIdsBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &ins,int num_threads,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
     DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsIds,
                                   absl::string_view, std::vector<int>);
@@ -3470,6 +3555,11 @@ SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__EncodeAsSerializedPr
                                   absl::string_view,
                                   sentencepiece::util::bytes);
   }
+SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sentencepiece_SentencePieceProcessor__EncodeAsImmutableProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &ins,int num_threads,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+    DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsImmutableProto,
+                                  absl::string_view,
+                                  sentencepiece::ImmutableSentencePieceText);
+  }
 SWIGINTERN std::string sentencepiece_SentencePieceProcessor__DecodeIds(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){
     CheckIds(ids, self->GetPieceSize());
     return self->DecodeIds(ids);
@@ -3485,6 +3575,14 @@ SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__Deco
     CheckIds(pieces, self->GetPieceSize());
     return self->DecodePiecesAsSerializedProto(pieces);
   }
+SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_SentencePieceProcessor__DecodeIdsAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){
+    CheckIds(ids, self->GetPieceSize());
+    return self->DecodeIdsAsImmutableProto(ids);
+  }
+SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &pieces){
+    CheckIds(pieces, self->GetPieceSize());
+    return self->DecodePiecesAsImmutableProto(pieces);
+  }
 SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcessor__DecodeIdsBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< int > > const &ins,int num_threads){
     DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIds, int, std::string);
   }
@@ -3499,6 +3597,10 @@ SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__DecodePiecesAsSerial
     DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsSerializedProto, std::string,
                                   sentencepiece::util::bytes);
   }
+SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< absl::string_view > > const &ins,int num_threads){
+    DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsImmutableProto, std::string,
+                                  sentencepiece::ImmutableSentencePieceText);
+  }
 SWIGINTERN std::vector< std::vector< int > > sentencepiece_SentencePieceProcessor__NBestEncodeAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int nbest_size,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
     auto idss = self->NBestEncodeAsIds(text, nbest_size);
     for (auto &ids : idss) {
@@ -3518,26 +3620,43 @@ SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__NBes
                add_bos, add_eos, reverse, emit_unk_piece);
     return self->NBestEncodeAsSerializedProto(text, nbest_size);
   }
-SWIGINTERN std::vector< std::pair< std::vector< int >,float > > sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float theta,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+SWIGINTERN sentencepiece::ImmutableNBestSentencePieceText sentencepiece_SentencePieceProcessor__NBestEncodeAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int nbest_size,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+    RewriteIds(*self, static_cast<sentencepiece::ImmutableSentencePieceText *>(nullptr),
+               add_bos, add_eos, reverse, emit_unk_piece);
+    return self->NBestEncodeAsImmutableProto(text, nbest_size);
+  }
+SWIGINTERN std::vector< std::pair< std::vector< int >,float > > sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
     auto idss = self->SampleEncodeAndScoreAsIds(text, num_samples,
-                                                 theta, wor, include_best);
+                                                 alpha, wor, include_best);
     for (auto &ids : idss) {
       RewriteIds(*self, &ids.first, add_bos, add_eos, reverse, emit_unk_piece);
     }
     return idss;
   }
-SWIGINTERN std::vector< std::pair< std::vector< std::string >,float > > sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsPieces(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float theta,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+SWIGINTERN std::vector< std::pair< std::vector< std::string >,float > > sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsPieces(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
     auto piecess = self->SampleEncodeAndScoreAsPieces(text, num_samples,
-                                                       theta, wor, include_best);
+                                                       alpha, wor, include_best);
     for (auto &pieces : piecess) {
       RewriteIds(*self, &pieces.first, add_bos, add_eos, reverse, emit_unk_piece);
     }
     return piecess;
   }
-SWIGINTERN float sentencepiece_SentencePieceProcessor__CalculateEntropy(sentencepiece::SentencePieceProcessor *self,absl::string_view text,float theta){
-    return self->CalculateEntropy(text, theta);
+SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+    RewriteIds(*self, static_cast<sentencepiece::util::bytes *>(nullptr),
+               add_bos, add_eos, reverse, emit_unk_piece);
+    return self->SampleEncodeAndScoreAsSerializedProto(text, num_samples,
+                                                        alpha, wor, include_best);
+  }
+SWIGINTERN sentencepiece::ImmutableNBestSentencePieceText sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){
+    RewriteIds(*self, static_cast<sentencepiece::util::bytes *>(nullptr),
+               add_bos, add_eos, reverse, emit_unk_piece);
+    return self->SampleEncodeAndScoreAsImmutableProto(text, num_samples,
+                                                       alpha, wor, include_best);
+  }
+SWIGINTERN float sentencepiece_SentencePieceProcessor__CalculateEntropy(sentencepiece::SentencePieceProcessor *self,absl::string_view text,float alpha){
+    return self->CalculateEntropy(text, alpha);
   }
-SWIGINTERN std::vector< float > sentencepiece_SentencePieceProcessor__CalculateEntropyBatch(sentencepiece::SentencePieceProcessor *self,std::vector< absl::string_view > const &ins,float theta,int num_threads){
+SWIGINTERN std::vector< float > sentencepiece_SentencePieceProcessor__CalculateEntropyBatch(sentencepiece::SentencePieceProcessor *self,std::vector< absl::string_view > const &ins,float alpha,int num_threads){
     std::vector<float> outs(ins.size());
     InitNumThreads(ins, &num_threads);
     {
@@ -3545,7 +3664,7 @@ SWIGINTERN std::vector< float > sentencepiece_SentencePieceProcessor__CalculateE
       for (int n = 0;  n < num_threads; ++n) {
         pool.Schedule([&, n]() {
             for (size_t i = n; i < ins.size(); i += num_threads) {
-              outs[i] = self->CalculateEntropy(ins[i], theta);
+              outs[i] = self->CalculateEntropy(ins[i], alpha);
           }
         });
       }
@@ -3596,56 +3715,672 @@ SWIG_AsVal_unsigned_SS_long (PyObject *obj, unsigned long *val)
       }
     }
   }
-#endif
-  return SWIG_TypeError;
+#endif
+  return SWIG_TypeError;
+}
+
+
+SWIGINTERN int
+SWIG_AsVal_unsigned_SS_int (PyObject * obj, unsigned int *val)
+{
+  unsigned long v;
+  int res = SWIG_AsVal_unsigned_SS_long (obj, &v);
+  if (SWIG_IsOK(res)) {
+    if ((v > UINT_MAX)) {
+      return SWIG_OverflowError;
+    } else {
+      if (val) *val = static_cast< unsigned int >(v);
+    }
+  }  
+  return res;
+}
+
+SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromString(absl::string_view arg){
+    const auto _status = sentencepiece::SentencePieceTrainer::Train(arg);
+    if (!_status.ok()) throw _status;
+    return;
+  }
+SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromMap(std::unordered_map< std::string,std::string > const &args){
+    const auto _status = sentencepiece::SentencePieceTrainer::Train(args);
+    if (!_status.ok()) throw _status;
+    return;
+  }
+SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromMap2(std::unordered_map< std::string,std::string > const &args,sentencepiece::SentenceIterator *iter){
+    const auto _status = sentencepiece::SentencePieceTrainer::Train(args, iter);
+    if (!_status.ok()) throw _status;
+    return;
+  }
+SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer__TrainFromMap3(std::unordered_map< std::string,std::string > const &args){
+    sentencepiece::util::bytes model_proto;
+    const auto _status = sentencepiece::SentencePieceTrainer::Train(args, nullptr, &model_proto);
+    if (!_status.ok()) throw _status;
+    return model_proto;
+  }
+SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer__TrainFromMap4(std::unordered_map< std::string,std::string > const &args,sentencepiece::SentenceIterator *iter){
+    sentencepiece::util::bytes model_proto;
+    const auto _status = sentencepiece::SentencePieceTrainer::Train(args, iter, &model_proto);
+    if (!_status.ok()) throw _status;
+    return model_proto;
+  }
+#ifdef __cplusplus
+extern "C" {
+#endif
+SWIGINTERN PyObject *_wrap_new_ImmutableSentencePieceText_ImmutableSentencePiece(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *result = 0 ;
+  
+  if (!SWIG_Python_UnpackTuple(args, "new_ImmutableSentencePieceText_ImmutableSentencePiece", 0, 0, 0)) SWIG_fail;
+  {
+    try {
+      result = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *)new sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_POINTER_NEW |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_delete_ImmutableSentencePieceText_ImmutableSentencePiece(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_POINTER_DISOWN |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_ImmutableSentencePieceText_ImmutableSentencePiece" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1);
+  {
+    try {
+      delete arg1;
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_Py_Void();
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece_piece(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  std::string *result = 0 ;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece_piece" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1);
+  {
+    try {
+      result = (std::string *) &((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->piece();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    PyObject *input_type = resultobj;
+    resultobj = MakePyOutputString(*result, input_type);
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece_surface(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  std::string *result = 0 ;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece_surface" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1);
+  {
+    try {
+      result = (std::string *) &((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->surface();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    PyObject *input_type = resultobj;
+    resultobj = MakePyOutputString(*result, input_type);
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece_id(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  uint32_t result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece_id" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->id();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_From_unsigned_SS_int(static_cast< unsigned int >(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece_begin(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  uint32_t result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece_begin" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->begin();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_From_unsigned_SS_int(static_cast< unsigned int >(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece_end(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  uint32_t result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece_end" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->end();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_From_unsigned_SS_int(static_cast< unsigned int >(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *ImmutableSentencePieceText_ImmutableSentencePiece_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *obj;
+  if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL;
+  SWIG_TypeNewClientData(SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_NewClientData(obj));
+  return SWIG_Py_Void();
+}
+
+SWIGINTERN PyObject *ImmutableSentencePieceText_ImmutableSentencePiece_swiginit(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  return SWIG_Python_InitShadowInstance(args);
+}
+
+SWIGINTERN PyObject *_wrap_new_ImmutableSentencePieceText(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *result = 0 ;
+  
+  if (!SWIG_Python_UnpackTuple(args, "new_ImmutableSentencePieceText", 0, 0, 0)) SWIG_fail;
+  {
+    try {
+      result = (sentencepiece::ImmutableSentencePieceText *)new sentencepiece::ImmutableSentencePieceText();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_NEW |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_delete_ImmutableSentencePieceText(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_DISOWN |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_ImmutableSentencePieceText" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1);
+  {
+    try {
+      delete arg1;
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_Py_Void();
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_pieces_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  size_t result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_pieces_size" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableSentencePieceText const *)arg1)->pieces_size();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_text(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  std::string *result = 0 ;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_text" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1);
+  {
+    try {
+      result = (std::string *) &((sentencepiece::ImmutableSentencePieceText const *)arg1)->text();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    PyObject *input_type = resultobj;
+    resultobj = MakePyOutputString(*result, input_type);
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_score(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  float result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_score" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1);
+  {
+    try {
+      result = (float)((sentencepiece::ImmutableSentencePieceText const *)arg1)->score();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_From_float(static_cast< float >(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_SerializeAsString(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  sentencepiece::util::bytes result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_SerializeAsString" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableSentencePieceText const *)arg1)->SerializeAsString();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    resultobj = MakePyOutputBytes(result);
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_pieces(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ;
+  int arg2 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val2 ;
+  int ecode2 = 0 ;
+  PyObject *swig_obj[2] ;
+  sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "ImmutableSentencePieceText_pieces", 2, 2, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_pieces" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1);
+  ecode2 = SWIG_AsVal_int(swig_obj[1], &val2);
+  if (!SWIG_IsOK(ecode2)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "ImmutableSentencePieceText_pieces" "', argument " "2"" of type '" "int""'");
+  } 
+  arg2 = static_cast< int >(val2);
+  {
+    try {
+      result = sentencepiece_ImmutableSentencePieceText_pieces((sentencepiece::ImmutableSentencePieceText const *)arg1,arg2);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece(static_cast< const sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece& >(result))), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_POINTER_OWN |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *ImmutableSentencePieceText_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *obj;
+  if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL;
+  SWIG_TypeNewClientData(SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_NewClientData(obj));
+  return SWIG_Py_Void();
+}
+
+SWIGINTERN PyObject *ImmutableSentencePieceText_swiginit(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  return SWIG_Python_InitShadowInstance(args);
+}
+
+SWIGINTERN PyObject *_wrap_new_ImmutableNBestSentencePieceText(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableNBestSentencePieceText *result = 0 ;
+  
+  if (!SWIG_Python_UnpackTuple(args, "new_ImmutableNBestSentencePieceText", 0, 0, 0)) SWIG_fail;
+  {
+    try {
+      result = (sentencepiece::ImmutableNBestSentencePieceText *)new sentencepiece::ImmutableNBestSentencePieceText();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_NEW |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_delete_ImmutableNBestSentencePieceText(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_DISOWN |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_ImmutableNBestSentencePieceText" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1);
+  {
+    try {
+      delete arg1;
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_Py_Void();
+  return resultobj;
+fail:
+  return NULL;
 }
 
 
-SWIGINTERN int
-SWIG_AsVal_unsigned_SS_int (PyObject * obj, unsigned int *val)
-{
-  unsigned long v;
-  int res = SWIG_AsVal_unsigned_SS_long (obj, &v);
-  if (SWIG_IsOK(res)) {
-    if ((v > UINT_MAX)) {
-      return SWIG_OverflowError;
-    } else {
-      if (val) *val = static_cast< unsigned int >(v);
+SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText_nbests_size(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  size_t result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableNBestSentencePieceText_nbests_size" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableNBestSentencePieceText const *)arg1)->nbests_size();
+      ReleaseResultObject(resultobj);
     }
-  }  
-  return res;
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_From_size_t(static_cast< size_t >(result));
+  return resultobj;
+fail:
+  return NULL;
 }
 
-SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromString(absl::string_view arg){
-    const auto _status = sentencepiece::SentencePieceTrainer::Train(arg);
-    if (!_status.ok()) throw _status;
-    return;
+
+SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText_SerializeAsString(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[1] ;
+  sentencepiece::util::bytes result;
+  
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableNBestSentencePieceText_SerializeAsString" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText const *""'"); 
   }
-SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromMap(std::unordered_map< std::string,std::string > const &args){
-    const auto _status = sentencepiece::SentencePieceTrainer::Train(args);
-    if (!_status.ok()) throw _status;
-    return;
+  arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1);
+  {
+    try {
+      result = ((sentencepiece::ImmutableNBestSentencePieceText const *)arg1)->SerializeAsString();
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
   }
-SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromMap2(std::unordered_map< std::string,std::string > const &args,sentencepiece::SentenceIterator *iter){
-    const auto _status = sentencepiece::SentencePieceTrainer::Train(args, iter);
-    if (!_status.ok()) throw _status;
-    return;
+  {
+    resultobj = MakePyOutputBytes(result);
   }
-SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer__TrainFromMap3(std::unordered_map< std::string,std::string > const &args){
-    sentencepiece::util::bytes model_proto;
-    const auto _status = sentencepiece::SentencePieceTrainer::Train(args, nullptr, &model_proto);
-    if (!_status.ok()) throw _status;
-    return model_proto;
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText_nbests(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ;
+  int arg2 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val2 ;
+  int ecode2 = 0 ;
+  PyObject *swig_obj[2] ;
+  sentencepiece::ImmutableSentencePieceText result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "ImmutableNBestSentencePieceText_nbests", 2, 2, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableNBestSentencePieceText_nbests" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText const *""'"); 
   }
-SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer__TrainFromMap4(std::unordered_map< std::string,std::string > const &args,sentencepiece::SentenceIterator *iter){
-    sentencepiece::util::bytes model_proto;
-    const auto _status = sentencepiece::SentencePieceTrainer::Train(args, iter, &model_proto);
-    if (!_status.ok()) throw _status;
-    return model_proto;
+  arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1);
+  ecode2 = SWIG_AsVal_int(swig_obj[1], &val2);
+  if (!SWIG_IsOK(ecode2)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "ImmutableNBestSentencePieceText_nbests" "', argument " "2"" of type '" "int""'");
+  } 
+  arg2 = static_cast< int >(val2);
+  {
+    try {
+      result = sentencepiece_ImmutableNBestSentencePieceText_nbests((sentencepiece::ImmutableNBestSentencePieceText const *)arg1,arg2);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
   }
-#ifdef __cplusplus
-extern "C" {
-#endif
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(static_cast< const sentencepiece::ImmutableSentencePieceText& >(result))), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *ImmutableNBestSentencePieceText_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *obj;
+  if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL;
+  SWIG_TypeNewClientData(SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_NewClientData(obj));
+  return SWIG_Py_Void();
+}
+
+SWIGINTERN PyObject *ImmutableNBestSentencePieceText_swiginit(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  return SWIG_Python_InitShadowInstance(args);
+}
+
 SWIGINTERN PyObject *_wrap_new_SentencePieceProcessor(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *result = 0 ;
@@ -3992,165 +4727,16 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy__SWIG_0(PyObj
   float *arg4 = (float *) 0 ;
   void *argp1 = 0 ;
   int res1 = 0 ;
-  float val3 ;
-  int ecode3 = 0 ;
-  void *argp4 = 0 ;
-  int res4 = 0 ;
-  sentencepiece::util::Status result;
-  
-  if ((nobjs < 4) || (nobjs > 4)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
-  }
-  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
-  {
-    const PyInputString ustring(swig_obj[1]);
-    if (!ustring.IsAvalable()) {
-      PyErr_SetString(PyExc_TypeError, "not a string");
-      SWIG_fail;
-    }
-    resultobj = ustring.input_type();
-    arg2 = ustring.str();
-  }
-  ecode3 = SWIG_AsVal_float(swig_obj[2], &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "3"" of type '" "float""'");
-  } 
-  arg3 = static_cast< float >(val3);
-  res4 = SWIG_ConvertPtr(swig_obj[3], &argp4,SWIGTYPE_p_float, 0 |  0 );
-  if (!SWIG_IsOK(res4)) {
-    SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "4"" of type '" "float *""'"); 
-  }
-  arg4 = reinterpret_cast< float * >(argp4);
-  {
-    try {
-      result = ((sentencepiece::SentencePieceProcessor const *)arg1)->CalculateEntropy(arg2,arg3,arg4);
-      ReleaseResultObject(resultobj);
-    }
-    catch (const sentencepiece::util::Status &status) {
-      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
-    }
-  }
-  {
-    if (!(&result)->ok()) {
-      SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str());
-    }
-    resultobj = SWIG_From_bool((&result)->ok());
-  }
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAndScoreAsPieces(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
-  absl::string_view arg2 ;
-  int arg3 ;
-  float arg4 ;
-  bool arg5 ;
-  bool arg6 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int val3 ;
-  int ecode3 = 0 ;
-  float val4 ;
-  int ecode4 = 0 ;
-  bool val5 ;
-  int ecode5 = 0 ;
-  bool val6 ;
-  int ecode6 = 0 ;
-  PyObject *swig_obj[6] ;
-  std::vector< std::pair< std::vector< std::string >,float > > result;
-  
-  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SampleEncodeAndScoreAsPieces", 6, 6, swig_obj)) SWIG_fail;
-  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
-  if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
-  }
-  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
-  {
-    const PyInputString ustring(swig_obj[1]);
-    if (!ustring.IsAvalable()) {
-      PyErr_SetString(PyExc_TypeError, "not a string");
-      SWIG_fail;
-    }
-    resultobj = ustring.input_type();
-    arg2 = ustring.str();
-  }
-  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
-  if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsPieces" "', argument " "3"" of type '" "int""'");
-  } 
-  arg3 = static_cast< int >(val3);
-  ecode4 = SWIG_AsVal_float(swig_obj[3], &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsPieces" "', argument " "4"" of type '" "float""'");
-  } 
-  arg4 = static_cast< float >(val4);
-  ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsPieces" "', argument " "5"" of type '" "bool""'");
-  } 
-  arg5 = static_cast< bool >(val5);
-  ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsPieces" "', argument " "6"" of type '" "bool""'");
-  } 
-  arg6 = static_cast< bool >(val6);
-  {
-    try {
-      result = ((sentencepiece::SentencePieceProcessor const *)arg1)->SampleEncodeAndScoreAsPieces(arg2,arg3,arg4,arg5,arg6);
-      ReleaseResultObject(resultobj);
-    }
-    catch (const sentencepiece::util::Status &status) {
-      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
-    }
-  }
-  {
-    PyObject *input_type = resultobj;
-    resultobj = PyList_New((&result)->size());
-    for (size_t i = 0; i < (&result)->size(); ++i) {
-      PyObject *obj = PyList_New(result[i].first.size());
-      for (size_t j = 0; j < result[i].first.size(); ++j) {
-        PyList_SET_ITEM(obj, j, MakePyOutputString(result[i].first[j], input_type));
-      }
-      PyList_SET_ITEM(resultobj, i, PyTuple_Pack(2, obj, PyFloat_FromDouble(static_cast<double>(result[i].second))));
-    }
-  }
-  return resultobj;
-fail:
-  return NULL;
-}
-
-
-SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAndScoreAsIds(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
-  PyObject *resultobj = 0;
-  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
-  absl::string_view arg2 ;
-  int arg3 ;
-  float arg4 ;
-  bool arg5 ;
-  bool arg6 ;
-  void *argp1 = 0 ;
-  int res1 = 0 ;
-  int val3 ;
+  float val3 ;
   int ecode3 = 0 ;
-  float val4 ;
-  int ecode4 = 0 ;
-  bool val5 ;
-  int ecode5 = 0 ;
-  bool val6 ;
-  int ecode6 = 0 ;
-  PyObject *swig_obj[6] ;
-  std::vector< std::pair< std::vector< int >,float > > result;
+  void *argp4 = 0 ;
+  int res4 = 0 ;
+  sentencepiece::util::Status result;
   
-  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SampleEncodeAndScoreAsIds", 6, 6, swig_obj)) SWIG_fail;
+  if ((nobjs < 4) || (nobjs > 4)) SWIG_fail;
   res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
   if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
   }
   arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
   {
@@ -4162,29 +4748,19 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAndScoreAsIds(PyOb
     resultobj = ustring.input_type();
     arg2 = ustring.str();
   }
-  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
+  ecode3 = SWIG_AsVal_float(swig_obj[2], &val3);
   if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsIds" "', argument " "3"" of type '" "int""'");
-  } 
-  arg3 = static_cast< int >(val3);
-  ecode4 = SWIG_AsVal_float(swig_obj[3], &val4);
-  if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsIds" "', argument " "4"" of type '" "float""'");
-  } 
-  arg4 = static_cast< float >(val4);
-  ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5);
-  if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsIds" "', argument " "5"" of type '" "bool""'");
-  } 
-  arg5 = static_cast< bool >(val5);
-  ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6);
-  if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor_SampleEncodeAndScoreAsIds" "', argument " "6"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "3"" of type '" "float""'");
   } 
-  arg6 = static_cast< bool >(val6);
+  arg3 = static_cast< float >(val3);
+  res4 = SWIG_ConvertPtr(swig_obj[3], &argp4,SWIGTYPE_p_float, 0 |  0 );
+  if (!SWIG_IsOK(res4)) {
+    SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "4"" of type '" "float *""'"); 
+  }
+  arg4 = reinterpret_cast< float * >(argp4);
   {
     try {
-      result = ((sentencepiece::SentencePieceProcessor const *)arg1)->SampleEncodeAndScoreAsIds(arg2,arg3,arg4,arg5,arg6);
+      result = ((sentencepiece::SentencePieceProcessor const *)arg1)->CalculateEntropy(arg2,arg3,arg4);
       ReleaseResultObject(resultobj);
     }
     catch (const sentencepiece::util::Status &status) {
@@ -4192,14 +4768,10 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAndScoreAsIds(PyOb
     }
   }
   {
-    resultobj = PyList_New((&result)->size());
-    for (size_t i = 0; i < (&result)->size(); ++i) {
-      PyObject *obj = PyList_New(result[i].first.size());
-      for (size_t j = 0; j < result[i].first.size(); ++j) {
-        PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast<long>(result[i].first[j])));
-      }
-      PyList_SET_ITEM(resultobj, i, PyTuple_Pack(2, obj, PyFloat_FromDouble(static_cast<double>(result[i].second))));
+    if (!(&result)->ok()) {
+      SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str());
     }
+    resultobj = SWIG_From_bool((&result)->ok());
   }
   return resultobj;
 fail:
@@ -5112,15 +5684,242 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProto(PyObj
     }
   }
   {
-    resultobj = MakePyOutputBytes(result);
+    resultobj = MakePyOutputBytes(result);
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsImmutableProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  absl::string_view arg2 ;
+  bool arg3 ;
+  int arg4 ;
+  float arg5 ;
+  bool arg6 ;
+  bool arg7 ;
+  bool arg8 ;
+  bool arg9 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  bool val3 ;
+  int ecode3 = 0 ;
+  int val4 ;
+  int ecode4 = 0 ;
+  float val5 ;
+  int ecode5 = 0 ;
+  bool val6 ;
+  int ecode6 = 0 ;
+  bool val7 ;
+  int ecode7 = 0 ;
+  bool val8 ;
+  int ecode8 = 0 ;
+  bool val9 ;
+  int ecode9 = 0 ;
+  PyObject *swig_obj[9] ;
+  sentencepiece::ImmutableSentencePieceText result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsImmutableProto", 9, 9, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    const PyInputString ustring(swig_obj[1]);
+    if (!ustring.IsAvalable()) {
+      PyErr_SetString(PyExc_TypeError, "not a string");
+      SWIG_fail;
+    }
+    resultobj = ustring.input_type();
+    arg2 = ustring.str();
+  }
+  ecode3 = SWIG_AsVal_bool(swig_obj[2], &val3);
+  if (!SWIG_IsOK(ecode3)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "3"" of type '" "bool""'");
+  } 
+  arg3 = static_cast< bool >(val3);
+  ecode4 = SWIG_AsVal_int(swig_obj[3], &val4);
+  if (!SWIG_IsOK(ecode4)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "4"" of type '" "int""'");
+  } 
+  arg4 = static_cast< int >(val4);
+  ecode5 = SWIG_AsVal_float(swig_obj[4], &val5);
+  if (!SWIG_IsOK(ecode5)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "5"" of type '" "float""'");
+  } 
+  arg5 = static_cast< float >(val5);
+  ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6);
+  if (!SWIG_IsOK(ecode6)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "6"" of type '" "bool""'");
+  } 
+  arg6 = static_cast< bool >(val6);
+  ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
+  if (!SWIG_IsOK(ecode7)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "7"" of type '" "bool""'");
+  } 
+  arg7 = static_cast< bool >(val7);
+  ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
+  if (!SWIG_IsOK(ecode8)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "8"" of type '" "bool""'");
+  } 
+  arg8 = static_cast< bool >(val8);
+  ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
+  if (!SWIG_IsOK(ecode9)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "9"" of type '" "bool""'");
+  } 
+  arg9 = static_cast< bool >(val9);
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__EncodeAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(static_cast< const sentencepiece::ImmutableSentencePieceText& >(result))), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  std::vector< absl::string_view > *arg2 = 0 ;
+  int arg3 ;
+  bool arg4 ;
+  int arg5 ;
+  float arg6 ;
+  bool arg7 ;
+  bool arg8 ;
+  bool arg9 ;
+  bool arg10 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val3 ;
+  int ecode3 = 0 ;
+  bool val4 ;
+  int ecode4 = 0 ;
+  int val5 ;
+  int ecode5 = 0 ;
+  float val6 ;
+  int ecode6 = 0 ;
+  bool val7 ;
+  int ecode7 = 0 ;
+  bool val8 ;
+  int ecode8 = 0 ;
+  bool val9 ;
+  int ecode9 = 0 ;
+  bool val10 ;
+  int ecode10 = 0 ;
+  PyObject *swig_obj[10] ;
+  std::vector< std::vector< int > > result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsIdsBatch", 10, 10, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    std::vector<absl::string_view> *out = nullptr;
+    if (PyList_Check(swig_obj[1])) {
+      const size_t size = PyList_Size(swig_obj[1]);
+      out = new std::vector<absl::string_view>(size);
+      for (size_t i = 0; i < size; ++i) {
+        const PyInputString ustring(PyList_GetItem(swig_obj[1], i));
+        if (ustring.IsAvalable()) {
+          (*out)[i] = ustring.str();
+        } else {
+          PyErr_SetString(PyExc_TypeError, "list must contain strings");
+          SWIG_fail;
+        }
+        resultobj = ustring.input_type();
+      }
+    } else {
+      PyErr_SetString(PyExc_TypeError, "not a list");
+      SWIG_fail;
+    }
+    arg2 = out;
+  }
+  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
+  if (!SWIG_IsOK(ecode3)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "3"" of type '" "int""'");
+  } 
+  arg3 = static_cast< int >(val3);
+  ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4);
+  if (!SWIG_IsOK(ecode4)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "4"" of type '" "bool""'");
+  } 
+  arg4 = static_cast< bool >(val4);
+  ecode5 = SWIG_AsVal_int(swig_obj[4], &val5);
+  if (!SWIG_IsOK(ecode5)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "5"" of type '" "int""'");
+  } 
+  arg5 = static_cast< int >(val5);
+  ecode6 = SWIG_AsVal_float(swig_obj[5], &val6);
+  if (!SWIG_IsOK(ecode6)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "6"" of type '" "float""'");
+  } 
+  arg6 = static_cast< float >(val6);
+  ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
+  if (!SWIG_IsOK(ecode7)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "7"" of type '" "bool""'");
+  } 
+  arg7 = static_cast< bool >(val7);
+  ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
+  if (!SWIG_IsOK(ecode8)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "8"" of type '" "bool""'");
+  } 
+  arg8 = static_cast< bool >(val8);
+  ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
+  if (!SWIG_IsOK(ecode9)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "9"" of type '" "bool""'");
+  } 
+  arg9 = static_cast< bool >(val9);
+  ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10);
+  if (!SWIG_IsOK(ecode10)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "10"" of type '" "bool""'");
+  } 
+  arg10 = static_cast< bool >(val10);
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__EncodeAsIdsBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    resultobj = PyList_New((&result)->size());
+    for (size_t i = 0; i < (&result)->size(); ++i) {
+      PyObject *obj = PyList_New(result[i].size());
+      for (size_t j = 0; j < result[i].size(); ++j) {
+        PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast<long>(result[i][j])));
+      }
+      PyList_SET_ITEM(resultobj, i, obj);
+    }
+  }
+  {
+    delete arg2;
   }
   return resultobj;
 fail:
+  {
+    delete arg2;
+  }
   return NULL;
 }
 
 
-SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
   std::vector< absl::string_view > *arg2 = 0 ;
@@ -5151,12 +5950,12 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyObject *SW
   bool val10 ;
   int ecode10 = 0 ;
   PyObject *swig_obj[10] ;
-  std::vector< std::vector< int > > result;
+  std::vector< std::vector< std::string > > result;
   
-  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsIdsBatch", 10, 10, swig_obj)) SWIG_fail;
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsPiecesBatch", 10, 10, swig_obj)) SWIG_fail;
   res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
   if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
   }
   arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
   {
@@ -5182,47 +5981,47 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyObject *SW
   }
   ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
   if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "3"" of type '" "int""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "3"" of type '" "int""'");
   } 
   arg3 = static_cast< int >(val3);
   ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4);
   if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "4"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "4"" of type '" "bool""'");
   } 
   arg4 = static_cast< bool >(val4);
   ecode5 = SWIG_AsVal_int(swig_obj[4], &val5);
   if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "5"" of type '" "int""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "5"" of type '" "int""'");
   } 
   arg5 = static_cast< int >(val5);
   ecode6 = SWIG_AsVal_float(swig_obj[5], &val6);
   if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "6"" of type '" "float""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "6"" of type '" "float""'");
   } 
   arg6 = static_cast< float >(val6);
   ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
   if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "7"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "7"" of type '" "bool""'");
   } 
   arg7 = static_cast< bool >(val7);
   ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
   if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "8"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "8"" of type '" "bool""'");
   } 
   arg8 = static_cast< bool >(val8);
   ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
   if (!SWIG_IsOK(ecode9)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "9"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "9"" of type '" "bool""'");
   } 
   arg9 = static_cast< bool >(val9);
   ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10);
   if (!SWIG_IsOK(ecode10)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "10"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "10"" of type '" "bool""'");
   } 
   arg10 = static_cast< bool >(val10);
   {
     try {
-      result = sentencepiece_SentencePieceProcessor__EncodeAsIdsBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
+      result = sentencepiece_SentencePieceProcessor__EncodeAsPiecesBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
       ReleaseResultObject(resultobj);
     }
     catch (const sentencepiece::util::Status &status) {
@@ -5230,11 +6029,12 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyObject *SW
     }
   }
   {
+    PyObject *input_type = resultobj;
     resultobj = PyList_New((&result)->size());
     for (size_t i = 0; i < (&result)->size(); ++i) {
       PyObject *obj = PyList_New(result[i].size());
       for (size_t j = 0; j < result[i].size(); ++j) {
-        PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast<long>(result[i][j])));
+        PyList_SET_ITEM(obj, j, MakePyOutputString(result[i][j], input_type));
       }
       PyList_SET_ITEM(resultobj, i, obj);
     }
@@ -5251,7 +6051,7 @@ fail:
 }
 
 
-SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
   std::vector< absl::string_view > *arg2 = 0 ;
@@ -5282,12 +6082,12 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(PyObject
   bool val10 ;
   int ecode10 = 0 ;
   PyObject *swig_obj[10] ;
-  std::vector< std::vector< std::string > > result;
+  BytesArray result;
   
-  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsPiecesBatch", 10, 10, swig_obj)) SWIG_fail;
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsSerializedProtoBatch", 10, 10, swig_obj)) SWIG_fail;
   res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
   if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
   }
   arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
   {
@@ -5313,47 +6113,47 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(PyObject
   }
   ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
   if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "3"" of type '" "int""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "3"" of type '" "int""'");
   } 
   arg3 = static_cast< int >(val3);
   ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4);
   if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "4"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "4"" of type '" "bool""'");
   } 
   arg4 = static_cast< bool >(val4);
   ecode5 = SWIG_AsVal_int(swig_obj[4], &val5);
   if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "5"" of type '" "int""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "5"" of type '" "int""'");
   } 
   arg5 = static_cast< int >(val5);
   ecode6 = SWIG_AsVal_float(swig_obj[5], &val6);
   if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "6"" of type '" "float""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "6"" of type '" "float""'");
   } 
   arg6 = static_cast< float >(val6);
   ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
   if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "7"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "7"" of type '" "bool""'");
   } 
   arg7 = static_cast< bool >(val7);
   ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
   if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "8"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "8"" of type '" "bool""'");
   } 
   arg8 = static_cast< bool >(val8);
   ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
   if (!SWIG_IsOK(ecode9)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "9"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "9"" of type '" "bool""'");
   } 
   arg9 = static_cast< bool >(val9);
   ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10);
   if (!SWIG_IsOK(ecode10)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "10"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "10"" of type '" "bool""'");
   } 
   arg10 = static_cast< bool >(val10);
   {
     try {
-      result = sentencepiece_SentencePieceProcessor__EncodeAsPiecesBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
+      result = sentencepiece_SentencePieceProcessor__EncodeAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
       ReleaseResultObject(resultobj);
     }
     catch (const sentencepiece::util::Status &status) {
@@ -5361,14 +6161,9 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(PyObject
     }
   }
   {
-    PyObject *input_type = resultobj;
     resultobj = PyList_New((&result)->size());
     for (size_t i = 0; i < (&result)->size(); ++i) {
-      PyObject *obj = PyList_New(result[i].size());
-      for (size_t j = 0; j < result[i].size(); ++j) {
-        PyList_SET_ITEM(obj, j, MakePyOutputString(result[i][j], input_type));
-      }
-      PyList_SET_ITEM(resultobj, i, obj);
+      PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i]));
     }
   }
   {
@@ -5383,7 +6178,7 @@ fail:
 }
 
 
-SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsImmutableProtoBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
   std::vector< absl::string_view > *arg2 = 0 ;
@@ -5414,12 +6209,12 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch(
   bool val10 ;
   int ecode10 = 0 ;
   PyObject *swig_obj[10] ;
-  BytesArray result;
+  SwigValueWrapper< std::vector< sentencepiece::ImmutableSentencePieceText > > result;
   
-  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsSerializedProtoBatch", 10, 10, swig_obj)) SWIG_fail;
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsImmutableProtoBatch", 10, 10, swig_obj)) SWIG_fail;
   res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
   if (!SWIG_IsOK(res1)) {
-    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
   }
   arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
   {
@@ -5445,47 +6240,47 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch(
   }
   ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
   if (!SWIG_IsOK(ecode3)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "3"" of type '" "int""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "3"" of type '" "int""'");
   } 
   arg3 = static_cast< int >(val3);
   ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4);
   if (!SWIG_IsOK(ecode4)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "4"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "4"" of type '" "bool""'");
   } 
   arg4 = static_cast< bool >(val4);
   ecode5 = SWIG_AsVal_int(swig_obj[4], &val5);
   if (!SWIG_IsOK(ecode5)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "5"" of type '" "int""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "5"" of type '" "int""'");
   } 
   arg5 = static_cast< int >(val5);
   ecode6 = SWIG_AsVal_float(swig_obj[5], &val6);
   if (!SWIG_IsOK(ecode6)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "6"" of type '" "float""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "6"" of type '" "float""'");
   } 
   arg6 = static_cast< float >(val6);
   ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
   if (!SWIG_IsOK(ecode7)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "7"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "7"" of type '" "bool""'");
   } 
   arg7 = static_cast< bool >(val7);
   ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
   if (!SWIG_IsOK(ecode8)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "8"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "8"" of type '" "bool""'");
   } 
   arg8 = static_cast< bool >(val8);
   ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
   if (!SWIG_IsOK(ecode9)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "9"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "9"" of type '" "bool""'");
   } 
   arg9 = static_cast< bool >(val9);
   ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10);
   if (!SWIG_IsOK(ecode10)) {
-    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "10"" of type '" "bool""'");
+    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "10"" of type '" "bool""'");
   } 
   arg10 = static_cast< bool >(val10);
   {
     try {
-      result = sentencepiece_SentencePieceProcessor__EncodeAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
+      result = sentencepiece_SentencePieceProcessor__EncodeAsImmutableProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
       ReleaseResultObject(resultobj);
     }
     catch (const sentencepiece::util::Status &status) {
@@ -5495,7 +6290,8 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch(
   {
     resultobj = PyList_New((&result)->size());
     for (size_t i = 0; i < (&result)->size(); ++i) {
-      PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i]));
+      PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText((&result)->at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0);
+      PyList_SET_ITEM(resultobj, i, obj);
     }
   }
   {
@@ -5750,6 +6546,121 @@ fail:
 }
 
 
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsImmutableProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  std::vector< int > *arg2 = 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[2] ;
+  sentencepiece::ImmutableSentencePieceText result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIdsAsImmutableProto", 2, 2, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIdsAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    std::vector<int> *out = nullptr;
+    if (PyList_Check(swig_obj[1])) {
+      const size_t size = PyList_Size(swig_obj[1]);
+      out = new std::vector<int>(size);
+      for (size_t i = 0; i < size; ++i) {
+        PyObject *o = PyList_GetItem(swig_obj[1], i);
+        if (PyInt_Check(o)) {
+          (*out)[i] = static_cast<int>(PyInt_AsLong(o));
+        } else {
+          PyErr_SetString(PyExc_TypeError,"list must contain integers");
+          SWIG_fail;
+        }
+      }
+    } else {
+      PyErr_SetString(PyExc_TypeError,"not a list");
+      SWIG_fail;
+    }
+    arg2 = out;
+  }
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__DecodeIdsAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< int > const &)*arg2);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(static_cast< const sentencepiece::ImmutableSentencePieceText& >(result))), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN |  0 );
+  {
+    delete arg2;
+  }
+  return resultobj;
+fail:
+  {
+    delete arg2;
+  }
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsImmutableProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  std::vector< absl::string_view > *arg2 = 0 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  PyObject *swig_obj[2] ;
+  sentencepiece::ImmutableSentencePieceText result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesAsImmutableProto", 2, 2, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    std::vector<absl::string_view> *out = nullptr;
+    if (PyList_Check(swig_obj[1])) {
+      const size_t size = PyList_Size(swig_obj[1]);
+      out = new std::vector<absl::string_view>(size);
+      for (size_t i = 0; i < size; ++i) {
+        const PyInputString ustring(PyList_GetItem(swig_obj[1], i));
+        if (ustring.IsAvalable()) {
+          (*out)[i] = ustring.str();
+        } else {
+          PyErr_SetString(PyExc_TypeError, "list must contain strings");
+          SWIG_fail;
+        }
+        resultobj = ustring.input_type();
+      }
+    } else {
+      PyErr_SetString(PyExc_TypeError, "not a list");
+      SWIG_fail;
+    }
+    arg2 = out;
+  }
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(static_cast< const sentencepiece::ImmutableSentencePieceText& >(result))), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN |  0 );
+  {
+    delete arg2;
+  }
+  return resultobj;
+fail:
+  {
+    delete arg2;
+  }
+  return NULL;
+}
+
+
 SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
@@ -6043,7 +6954,82 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsSerializedProto
   arg3 = static_cast< int >(val3);
   {
     try {
-      result = sentencepiece_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< absl::string_view > > const &)*arg2,arg3);
+      result = sentencepiece_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< absl::string_view > > const &)*arg2,arg3);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    resultobj = PyList_New((&result)->size());
+    for (size_t i = 0; i < (&result)->size(); ++i) {
+      PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i]));
+    }
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  std::vector< std::vector< absl::string_view > > *arg2 = 0 ;
+  int arg3 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val3 ;
+  int ecode3 = 0 ;
+  PyObject *swig_obj[3] ;
+  SwigValueWrapper< std::vector< sentencepiece::ImmutableSentencePieceText > > result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch", 3, 3, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    std::vector<std::vector<absl::string_view>> *out = nullptr;
+    if (PyList_Check(swig_obj[1])) {
+      const size_t size = PyList_Size(swig_obj[1]);
+      out = new std::vector<std::vector<absl::string_view>>(size);
+      for (size_t i = 0; i < size; ++i) {
+        PyObject *o = PyList_GetItem(swig_obj[1], i);
+        if (PyList_Check(o)) {
+          const size_t size2 = PyList_Size(o);
+          (*out)[i].resize(size2);
+          for (size_t j = 0; j < size2; ++j) {
+            const PyInputString ustring(PyList_GetItem(o, j));
+            if (ustring.IsAvalable()) {
+              (*out)[i][j] = ustring.str();
+            } else {
+              PyErr_SetString(PyExc_TypeError,"list must contain integers");
+              SWIG_fail;
+            }
+            resultobj = ustring.input_type();
+          }
+        } else {
+          PyErr_SetString(PyExc_TypeError,"not a list");
+          SWIG_fail;
+        }
+      }
+    } else {
+      PyErr_SetString(PyExc_TypeError,"not a list");
+      SWIG_fail;
+    }
+    arg2 = out;
+  }
+  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
+  if (!SWIG_IsOK(ecode3)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch" "', argument " "3"" of type '" "int""'");
+  } 
+  arg3 = static_cast< int >(val3);
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< absl::string_view > > const &)*arg2,arg3);
       ReleaseResultObject(resultobj);
     }
     catch (const sentencepiece::util::Status &status) {
@@ -6053,7 +7039,8 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsSerializedProto
   {
     resultobj = PyList_New((&result)->size());
     for (size_t i = 0; i < (&result)->size(); ++i) {
-      PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i]));
+      PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText((&result)->at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0);
+      PyList_SET_ITEM(resultobj, i, obj);
     }
   }
   return resultobj;
@@ -6323,6 +7310,86 @@ fail:
 }
 
 
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsImmutableProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  absl::string_view arg2 ;
+  int arg3 ;
+  bool arg4 ;
+  bool arg5 ;
+  bool arg6 ;
+  bool arg7 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val3 ;
+  int ecode3 = 0 ;
+  bool val4 ;
+  int ecode4 = 0 ;
+  bool val5 ;
+  int ecode5 = 0 ;
+  bool val6 ;
+  int ecode6 = 0 ;
+  bool val7 ;
+  int ecode7 = 0 ;
+  PyObject *swig_obj[7] ;
+  sentencepiece::ImmutableNBestSentencePieceText result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__NBestEncodeAsImmutableProto", 7, 7, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    const PyInputString ustring(swig_obj[1]);
+    if (!ustring.IsAvalable()) {
+      PyErr_SetString(PyExc_TypeError, "not a string");
+      SWIG_fail;
+    }
+    resultobj = ustring.input_type();
+    arg2 = ustring.str();
+  }
+  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
+  if (!SWIG_IsOK(ecode3)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "3"" of type '" "int""'");
+  } 
+  arg3 = static_cast< int >(val3);
+  ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4);
+  if (!SWIG_IsOK(ecode4)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "4"" of type '" "bool""'");
+  } 
+  arg4 = static_cast< bool >(val4);
+  ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5);
+  if (!SWIG_IsOK(ecode5)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "5"" of type '" "bool""'");
+  } 
+  arg5 = static_cast< bool >(val5);
+  ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6);
+  if (!SWIG_IsOK(ecode6)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "6"" of type '" "bool""'");
+  } 
+  arg6 = static_cast< bool >(val6);
+  ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
+  if (!SWIG_IsOK(ecode7)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "7"" of type '" "bool""'");
+  } 
+  arg7 = static_cast< bool >(val7);
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__NBestEncodeAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,arg2,arg3,arg4,arg5,arg6,arg7);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableNBestSentencePieceText(static_cast< const sentencepiece::ImmutableNBestSentencePieceText& >(result))), SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_OWN |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
 SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsIds(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
@@ -6550,6 +7617,216 @@ fail:
 }
 
 
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  absl::string_view arg2 ;
+  int arg3 ;
+  float arg4 ;
+  bool arg5 ;
+  bool arg6 ;
+  bool arg7 ;
+  bool arg8 ;
+  bool arg9 ;
+  bool arg10 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val3 ;
+  int ecode3 = 0 ;
+  float val4 ;
+  int ecode4 = 0 ;
+  bool val5 ;
+  int ecode5 = 0 ;
+  bool val6 ;
+  int ecode6 = 0 ;
+  bool val7 ;
+  int ecode7 = 0 ;
+  bool val8 ;
+  int ecode8 = 0 ;
+  bool val9 ;
+  int ecode9 = 0 ;
+  bool val10 ;
+  int ecode10 = 0 ;
+  PyObject *swig_obj[10] ;
+  sentencepiece::util::bytes result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto", 10, 10, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    const PyInputString ustring(swig_obj[1]);
+    if (!ustring.IsAvalable()) {
+      PyErr_SetString(PyExc_TypeError, "not a string");
+      SWIG_fail;
+    }
+    resultobj = ustring.input_type();
+    arg2 = ustring.str();
+  }
+  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
+  if (!SWIG_IsOK(ecode3)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "3"" of type '" "int""'");
+  } 
+  arg3 = static_cast< int >(val3);
+  ecode4 = SWIG_AsVal_float(swig_obj[3], &val4);
+  if (!SWIG_IsOK(ecode4)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "4"" of type '" "float""'");
+  } 
+  arg4 = static_cast< float >(val4);
+  ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5);
+  if (!SWIG_IsOK(ecode5)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "5"" of type '" "bool""'");
+  } 
+  arg5 = static_cast< bool >(val5);
+  ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6);
+  if (!SWIG_IsOK(ecode6)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "6"" of type '" "bool""'");
+  } 
+  arg6 = static_cast< bool >(val6);
+  ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
+  if (!SWIG_IsOK(ecode7)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "7"" of type '" "bool""'");
+  } 
+  arg7 = static_cast< bool >(val7);
+  ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
+  if (!SWIG_IsOK(ecode8)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "8"" of type '" "bool""'");
+  } 
+  arg8 = static_cast< bool >(val8);
+  ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
+  if (!SWIG_IsOK(ecode9)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "9"" of type '" "bool""'");
+  } 
+  arg9 = static_cast< bool >(val9);
+  ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10);
+  if (!SWIG_IsOK(ecode10)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "10"" of type '" "bool""'");
+  } 
+  arg10 = static_cast< bool >(val10);
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto((sentencepiece::SentencePieceProcessor const *)arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  {
+    resultobj = MakePyOutputBytes(result);
+  }
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
+SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
+  absl::string_view arg2 ;
+  int arg3 ;
+  float arg4 ;
+  bool arg5 ;
+  bool arg6 ;
+  bool arg7 ;
+  bool arg8 ;
+  bool arg9 ;
+  bool arg10 ;
+  void *argp1 = 0 ;
+  int res1 = 0 ;
+  int val3 ;
+  int ecode3 = 0 ;
+  float val4 ;
+  int ecode4 = 0 ;
+  bool val5 ;
+  int ecode5 = 0 ;
+  bool val6 ;
+  int ecode6 = 0 ;
+  bool val7 ;
+  int ecode7 = 0 ;
+  bool val8 ;
+  int ecode8 = 0 ;
+  bool val9 ;
+  int ecode9 = 0 ;
+  bool val10 ;
+  int ecode10 = 0 ;
+  PyObject *swig_obj[10] ;
+  sentencepiece::ImmutableNBestSentencePieceText result;
+  
+  if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto", 10, 10, swig_obj)) SWIG_fail;
+  res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 |  0 );
+  if (!SWIG_IsOK(res1)) {
+    SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); 
+  }
+  arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1);
+  {
+    const PyInputString ustring(swig_obj[1]);
+    if (!ustring.IsAvalable()) {
+      PyErr_SetString(PyExc_TypeError, "not a string");
+      SWIG_fail;
+    }
+    resultobj = ustring.input_type();
+    arg2 = ustring.str();
+  }
+  ecode3 = SWIG_AsVal_int(swig_obj[2], &val3);
+  if (!SWIG_IsOK(ecode3)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "3"" of type '" "int""'");
+  } 
+  arg3 = static_cast< int >(val3);
+  ecode4 = SWIG_AsVal_float(swig_obj[3], &val4);
+  if (!SWIG_IsOK(ecode4)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "4"" of type '" "float""'");
+  } 
+  arg4 = static_cast< float >(val4);
+  ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5);
+  if (!SWIG_IsOK(ecode5)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "5"" of type '" "bool""'");
+  } 
+  arg5 = static_cast< bool >(val5);
+  ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6);
+  if (!SWIG_IsOK(ecode6)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "6"" of type '" "bool""'");
+  } 
+  arg6 = static_cast< bool >(val6);
+  ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7);
+  if (!SWIG_IsOK(ecode7)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "7"" of type '" "bool""'");
+  } 
+  arg7 = static_cast< bool >(val7);
+  ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8);
+  if (!SWIG_IsOK(ecode8)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "8"" of type '" "bool""'");
+  } 
+  arg8 = static_cast< bool >(val8);
+  ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9);
+  if (!SWIG_IsOK(ecode9)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "9"" of type '" "bool""'");
+  } 
+  arg9 = static_cast< bool >(val9);
+  ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10);
+  if (!SWIG_IsOK(ecode10)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "10"" of type '" "bool""'");
+  } 
+  arg10 = static_cast< bool >(val10);
+  {
+    try {
+      result = sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10);
+      ReleaseResultObject(resultobj);
+    }
+    catch (const sentencepiece::util::Status &status) {
+      SWIG_exception(ToSwigError(status.code()), status.ToString().c_str());
+    }
+  }
+  resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableNBestSentencePieceText(static_cast< const sentencepiece::ImmutableNBestSentencePieceText& >(result))), SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_OWN |  0 );
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
 SWIGINTERN PyObject *_wrap_SentencePieceProcessor__CalculateEntropy(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ;
@@ -7009,6 +8286,31 @@ SWIGINTERN PyObject *SentencePieceTrainer_swigregister(PyObject *SWIGUNUSEDPARM(
 
 static PyMethodDef SwigMethods[] = {
         { "SWIG_PyInstanceMethod_New", SWIG_PyInstanceMethod_New, METH_O, NULL},
+        { "new_ImmutableSentencePieceText_ImmutableSentencePiece", _wrap_new_ImmutableSentencePieceText_ImmutableSentencePiece, METH_NOARGS, NULL},
+        { "delete_ImmutableSentencePieceText_ImmutableSentencePiece", _wrap_delete_ImmutableSentencePieceText_ImmutableSentencePiece, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_piece", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece_piece, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_surface", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece_surface, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_id", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece_id, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_begin", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece_begin, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_end", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece_end, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_swigregister", ImmutableSentencePieceText_ImmutableSentencePiece_swigregister, METH_O, NULL},
+        { "ImmutableSentencePieceText_ImmutableSentencePiece_swiginit", ImmutableSentencePieceText_ImmutableSentencePiece_swiginit, METH_VARARGS, NULL},
+        { "new_ImmutableSentencePieceText", _wrap_new_ImmutableSentencePieceText, METH_NOARGS, NULL},
+        { "delete_ImmutableSentencePieceText", _wrap_delete_ImmutableSentencePieceText, METH_O, NULL},
+        { "ImmutableSentencePieceText_pieces_size", _wrap_ImmutableSentencePieceText_pieces_size, METH_O, NULL},
+        { "ImmutableSentencePieceText_text", _wrap_ImmutableSentencePieceText_text, METH_O, NULL},
+        { "ImmutableSentencePieceText_score", _wrap_ImmutableSentencePieceText_score, METH_O, NULL},
+        { "ImmutableSentencePieceText_SerializeAsString", _wrap_ImmutableSentencePieceText_SerializeAsString, METH_O, NULL},
+        { "ImmutableSentencePieceText_pieces", _wrap_ImmutableSentencePieceText_pieces, METH_VARARGS, NULL},
+        { "ImmutableSentencePieceText_swigregister", ImmutableSentencePieceText_swigregister, METH_O, NULL},
+        { "ImmutableSentencePieceText_swiginit", ImmutableSentencePieceText_swiginit, METH_VARARGS, NULL},
+        { "new_ImmutableNBestSentencePieceText", _wrap_new_ImmutableNBestSentencePieceText, METH_NOARGS, NULL},
+        { "delete_ImmutableNBestSentencePieceText", _wrap_delete_ImmutableNBestSentencePieceText, METH_O, NULL},
+        { "ImmutableNBestSentencePieceText_nbests_size", _wrap_ImmutableNBestSentencePieceText_nbests_size, METH_O, NULL},
+        { "ImmutableNBestSentencePieceText_SerializeAsString", _wrap_ImmutableNBestSentencePieceText_SerializeAsString, METH_O, NULL},
+        { "ImmutableNBestSentencePieceText_nbests", _wrap_ImmutableNBestSentencePieceText_nbests, METH_VARARGS, NULL},
+        { "ImmutableNBestSentencePieceText_swigregister", ImmutableNBestSentencePieceText_swigregister, METH_O, NULL},
+        { "ImmutableNBestSentencePieceText_swiginit", ImmutableNBestSentencePieceText_swiginit, METH_VARARGS, NULL},
         { "new_SentencePieceProcessor", _wrap_new_SentencePieceProcessor, METH_NOARGS, NULL},
         { "delete_SentencePieceProcessor", _wrap_delete_SentencePieceProcessor, METH_O, NULL},
         { "SentencePieceProcessor_LoadFromSerializedProto", _wrap_SentencePieceProcessor_LoadFromSerializedProto, METH_VARARGS, NULL},
@@ -7017,8 +8319,6 @@ static PyMethodDef SwigMethods[] = {
         { "SentencePieceProcessor_SetVocabulary", _wrap_SentencePieceProcessor_SetVocabulary, METH_VARARGS, NULL},
         { "SentencePieceProcessor_ResetVocabulary", _wrap_SentencePieceProcessor_ResetVocabulary, METH_O, NULL},
         { "SentencePieceProcessor_LoadVocabulary", _wrap_SentencePieceProcessor_LoadVocabulary, METH_VARARGS, NULL},
-        { "SentencePieceProcessor_SampleEncodeAndScoreAsPieces", _wrap_SentencePieceProcessor_SampleEncodeAndScoreAsPieces, METH_VARARGS, NULL},
-        { "SentencePieceProcessor_SampleEncodeAndScoreAsIds", _wrap_SentencePieceProcessor_SampleEncodeAndScoreAsIds, METH_VARARGS, NULL},
         { "SentencePieceProcessor_CalculateEntropy", _wrap_SentencePieceProcessor_CalculateEntropy, METH_VARARGS, NULL},
         { "SentencePieceProcessor_GetPieceSize", _wrap_SentencePieceProcessor_GetPieceSize, METH_O, NULL},
         { "SentencePieceProcessor_PieceToId", _wrap_SentencePieceProcessor_PieceToId, METH_VARARGS, NULL},
@@ -7037,22 +8337,30 @@ static PyMethodDef SwigMethods[] = {
         { "SentencePieceProcessor__EncodeAsIds", _wrap_SentencePieceProcessor__EncodeAsIds, METH_VARARGS, NULL},
         { "SentencePieceProcessor__EncodeAsPieces", _wrap_SentencePieceProcessor__EncodeAsPieces, METH_VARARGS, NULL},
         { "SentencePieceProcessor__EncodeAsSerializedProto", _wrap_SentencePieceProcessor__EncodeAsSerializedProto, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__EncodeAsImmutableProto", _wrap_SentencePieceProcessor__EncodeAsImmutableProto, METH_VARARGS, NULL},
         { "SentencePieceProcessor__EncodeAsIdsBatch", _wrap_SentencePieceProcessor__EncodeAsIdsBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__EncodeAsPiecesBatch", _wrap_SentencePieceProcessor__EncodeAsPiecesBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__EncodeAsSerializedProtoBatch", _wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__EncodeAsImmutableProtoBatch", _wrap_SentencePieceProcessor__EncodeAsImmutableProtoBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodeIds", _wrap_SentencePieceProcessor__DecodeIds, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodePieces", _wrap_SentencePieceProcessor__DecodePieces, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodeIdsAsSerializedProto", _wrap_SentencePieceProcessor__DecodeIdsAsSerializedProto, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodePiecesAsSerializedProto", _wrap_SentencePieceProcessor__DecodePiecesAsSerializedProto, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__DecodeIdsAsImmutableProto", _wrap_SentencePieceProcessor__DecodeIdsAsImmutableProto, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__DecodePiecesAsImmutableProto", _wrap_SentencePieceProcessor__DecodePiecesAsImmutableProto, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodeIdsBatch", _wrap_SentencePieceProcessor__DecodeIdsBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch", _wrap_SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodePiecesBatch", _wrap_SentencePieceProcessor__DecodePiecesBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch", _wrap_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch", _wrap_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor__NBestEncodeAsIds", _wrap_SentencePieceProcessor__NBestEncodeAsIds, METH_VARARGS, NULL},
         { "SentencePieceProcessor__NBestEncodeAsPieces", _wrap_SentencePieceProcessor__NBestEncodeAsPieces, METH_VARARGS, NULL},
         { "SentencePieceProcessor__NBestEncodeAsSerializedProto", _wrap_SentencePieceProcessor__NBestEncodeAsSerializedProto, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__NBestEncodeAsImmutableProto", _wrap_SentencePieceProcessor__NBestEncodeAsImmutableProto, METH_VARARGS, NULL},
         { "SentencePieceProcessor__SampleEncodeAndScoreAsIds", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsIds, METH_VARARGS, NULL},
         { "SentencePieceProcessor__SampleEncodeAndScoreAsPieces", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsPieces, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto, METH_VARARGS, NULL},
+        { "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto, METH_VARARGS, NULL},
         { "SentencePieceProcessor__CalculateEntropy", _wrap_SentencePieceProcessor__CalculateEntropy, METH_VARARGS, NULL},
         { "SentencePieceProcessor__CalculateEntropyBatch", _wrap_SentencePieceProcessor__CalculateEntropyBatch, METH_VARARGS, NULL},
         { "SentencePieceProcessor_swigregister", SentencePieceProcessor_swigregister, METH_O, NULL},
@@ -7076,6 +8384,9 @@ static PyMethodDef SwigMethods_proxydocs[] = {
 
 static swig_type_info _swigt__p_char = {"_p_char", "char *", 0, 0, (void*)0, 0};
 static swig_type_info _swigt__p_float = {"_p_float", "float *", 0, 0, (void*)0, 0};
+static swig_type_info _swigt__p_sentencepiece__ImmutableNBestSentencePieceText = {"_p_sentencepiece__ImmutableNBestSentencePieceText", "sentencepiece::ImmutableNBestSentencePieceText *", 0, 0, (void*)0, 0};
+static swig_type_info _swigt__p_sentencepiece__ImmutableSentencePieceText = {"_p_sentencepiece__ImmutableSentencePieceText", "sentencepiece::ImmutableSentencePieceText *", 0, 0, (void*)0, 0};
+static swig_type_info _swigt__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece = {"_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece", "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *", 0, 0, (void*)0, 0};
 static swig_type_info _swigt__p_sentencepiece__SentenceIterator = {"_p_sentencepiece__SentenceIterator", "sentencepiece::SentenceIterator *", 0, 0, (void*)0, 0};
 static swig_type_info _swigt__p_sentencepiece__SentencePieceProcessor = {"_p_sentencepiece__SentencePieceProcessor", "sentencepiece::SentencePieceProcessor *", 0, 0, (void*)0, 0};
 static swig_type_info _swigt__p_sentencepiece__SentencePieceTrainer = {"_p_sentencepiece__SentencePieceTrainer", "sentencepiece::SentencePieceTrainer *", 0, 0, (void*)0, 0};
@@ -7089,6 +8400,9 @@ static swig_type_info _swigt__p_std__vectorT_std__vectorT_int_t_t = {"_p_std__ve
 static swig_type_info *swig_type_initial[] = {
   &_swigt__p_char,
   &_swigt__p_float,
+  &_swigt__p_sentencepiece__ImmutableNBestSentencePieceText,
+  &_swigt__p_sentencepiece__ImmutableSentencePieceText,
+  &_swigt__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece,
   &_swigt__p_sentencepiece__SentenceIterator,
   &_swigt__p_sentencepiece__SentencePieceProcessor,
   &_swigt__p_sentencepiece__SentencePieceTrainer,
@@ -7102,6 +8416,9 @@ static swig_type_info *swig_type_initial[] = {
 
 static swig_cast_info _swigc__p_char[] = {  {&_swigt__p_char, 0, 0, 0},{0, 0, 0, 0}};
 static swig_cast_info _swigc__p_float[] = {  {&_swigt__p_float, 0, 0, 0},{0, 0, 0, 0}};
+static swig_cast_info _swigc__p_sentencepiece__ImmutableNBestSentencePieceText[] = {  {&_swigt__p_sentencepiece__ImmutableNBestSentencePieceText, 0, 0, 0},{0, 0, 0, 0}};
+static swig_cast_info _swigc__p_sentencepiece__ImmutableSentencePieceText[] = {  {&_swigt__p_sentencepiece__ImmutableSentencePieceText, 0, 0, 0},{0, 0, 0, 0}};
+static swig_cast_info _swigc__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece[] = {  {&_swigt__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0, 0, 0},{0, 0, 0, 0}};
 static swig_cast_info _swigc__p_sentencepiece__SentenceIterator[] = {  {&_swigt__p_sentencepiece__SentenceIterator, 0, 0, 0},{0, 0, 0, 0}};
 static swig_cast_info _swigc__p_sentencepiece__SentencePieceProcessor[] = {  {&_swigt__p_sentencepiece__SentencePieceProcessor, 0, 0, 0},{0, 0, 0, 0}};
 static swig_cast_info _swigc__p_sentencepiece__SentencePieceTrainer[] = {  {&_swigt__p_sentencepiece__SentencePieceTrainer, 0, 0, 0},{0, 0, 0, 0}};
@@ -7115,6 +8432,9 @@ static swig_cast_info _swigc__p_std__vectorT_std__vectorT_int_t_t[] = {  {&_swig
 static swig_cast_info *swig_cast_initial[] = {
   _swigc__p_char,
   _swigc__p_float,
+  _swigc__p_sentencepiece__ImmutableNBestSentencePieceText,
+  _swigc__p_sentencepiece__ImmutableSentencePieceText,
+  _swigc__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece,
   _swigc__p_sentencepiece__SentenceIterator,
   _swigc__p_sentencepiece__SentencePieceProcessor,
   _swigc__p_sentencepiece__SentencePieceTrainer,
index 6c48bcd9b55e524235d4ff54fbe0ad40294dd20f..2f2c84aec542abfcb0aaf6469819b531486fa1fa 100755 (executable)
@@ -287,16 +287,44 @@ class TestSentencepieceProcessor(unittest.TestCase):
     ids2 = self.sp_.EncodeAsIds(text2)
     pieces = self.sp_.EncodeAsPieces(text)
     pieces2 = self.sp_.EncodeAsPieces(text2)
-    protos = self.sp_.EncodeAsSerializedProto(text)
-    proto2 = self.sp_.EncodeAsSerializedProto(text2)
+    sprotos = self.sp_.EncodeAsSerializedProto(text)
+    sproto2 = self.sp_.EncodeAsSerializedProto(text2)
+    iprotos = self.sp_.EncodeAsImmutableProto(text)
+    iprotos2 = self.sp_.EncodeAsImmutableProto(text2)
 
     self.assertEqual(sp.encode(text, out_type=int), ids)
     self.assertEqual(sp.encode(text, out_type=str), pieces)
-    self.assertEqual(sp.encode(text, out_type='proto'), protos)
+    self.assertEqual(sp.encode(text, out_type='serialized_proto'), sprotos)
+    self.assertEqual(sp.encode(text, out_type='immutable_proto'), iprotos)
 
     self.assertEqual(sp.encode([text], out_type=int), [ids])
     self.assertEqual(sp.encode([text], out_type=str), [pieces])
-    self.assertEqual(sp.encode([text], out_type='proto'), [protos])
+    self.assertEqual(sp.encode([text], out_type='serialized_proto'), [sprotos])
+    self.assertEqual(sp.encode([text], out_type='immutable_proto'), [iprotos])
+
+    self.assertEqual(len(iprotos), len(pieces))
+    self.assertEqual(len(iprotos), len(ids))
+    self.assertEqual(iprotos.text(), text)
+
+    self.assertEqual(len(iprotos2), len(pieces2))
+    self.assertEqual(len(iprotos2), len(ids2))
+    self.assertEqual(iprotos2.text(), text2)
+
+    for i in range(len(iprotos)):
+      self.assertEqual(ids[i], iprotos.pieces(i).id())
+      self.assertEqual(pieces[i], iprotos.pieces(i).piece())
+
+    for i, piece in enumerate(iprotos):
+      self.assertEqual(ids[i], piece.id())
+      self.assertEqual(pieces[i], piece.piece())
+
+    for i in range(len(iprotos2)):
+      self.assertEqual(ids2[i], iprotos2.pieces(i).id())
+      self.assertEqual(pieces2[i], iprotos2.pieces(i).piece())
+
+    for i, piece in enumerate(iprotos2):
+      self.assertEqual(ids2[i], piece.id())
+      self.assertEqual(pieces2[i], piece.piece())
 
     detok_ids = self.sp_.DecodeIds(ids)
     detok_pieces = self.sp_.DecodePieces(pieces)
@@ -464,19 +492,29 @@ class TestSentencepieceProcessor(unittest.TestCase):
     self.assertEqual(d1, d4)
     self.assertEqual(d1, d5)
 
-    r1 = sp.encode(texts, out_type='proto', num_threads=None)
-    r2 = sp.encode(texts, out_type='proto', num_threads=1)
-    r3 = sp.encode(texts, out_type='proto', num_threads=-1)
-    r4 = sp.encode(texts, out_type='proto', num_threads=8)
-    r5 = [sp.encode(s, out_type='proto') for s in texts]
+    r1 = sp.encode(texts, out_type='serialized_proto', num_threads=None)
+    r2 = sp.encode(texts, out_type='serialized_proto', num_threads=1)
+    r3 = sp.encode(texts, out_type='serialized_proto', num_threads=-1)
+    r4 = sp.encode(texts, out_type='serialized_proto', num_threads=8)
+    r5 = [sp.encode(s, out_type='serialized_proto') for s in texts]
+    self.assertEqual(r1, r2)
+    self.assertEqual(r1, r3)
+    self.assertEqual(r1, r4)
+    self.assertEqual(r1, r5)
+
+    r1 = sp.encode(texts, out_type='immutable_proto', num_threads=None)
+    r2 = sp.encode(texts, out_type='immutable_proto', num_threads=1)
+    r3 = sp.encode(texts, out_type='immutable_proto', num_threads=-1)
+    r4 = sp.encode(texts, out_type='immutable_proto', num_threads=8)
+    r5 = [sp.encode(s, out_type='immutable_proto') for s in texts]
     self.assertEqual(r1, r2)
     self.assertEqual(r1, r3)
     self.assertEqual(r1, r4)
     self.assertEqual(r1, r5)
 
-    e1 = sp.calculate_entropy(texts, theta=1.0, num_threads=10)
-    e2 = sp.CalculateEntropy(texts, theta=1.0, num_threads=10)
-    e3 = [sp.calculate_entropy(s, theta=1.0) for s in texts]
+    e1 = sp.calculate_entropy(texts, alpha=1.0, num_threads=10)
+    e2 = sp.CalculateEntropy(texts, alpha=1.0, num_threads=10)
+    e3 = [sp.calculate_entropy(s, alpha=1.0) for s in texts]
     self.assertEqual(e1, e2)
     self.assertEqual(e1, e3)
 
index 805e0f9335482af1fad58c922a2670d9d51d1b94..482a45bf1677f75fadbf543a6a37c5d1229308d8 100644 (file)
@@ -54,65 +54,70 @@ std::vector<absl::string_view> ToPieceArray(const std::vector<std::string> &v) {
   for (int i = 0; i < v.size(); ++i) out[i] = v[i];
   return out;
 }
+
 }  // namespace
 
-ImmutableSentencePieceText::ImmutableSentencePieceText() {}
-ImmutableSentencePieceText::~ImmutableSentencePieceText() {}
+ImmutableSentencePieceText::ImmutableSentencePieceText()
+    : spt_(&SentencePieceText::default_instance()) {}
 
 ImmutableSentencePieceText::ImmutableSentencePieceText(
     const SentencePieceText &spt)
     : spt_(&spt) {}
 
-ImmutableSentencePieceText::ImmutableSentencePiece::ImmutableSentencePiece(
-    const SentencePieceText_SentencePiece &sp)
+ImmutableSentencePieceText::~ImmutableSentencePieceText() {}
+
+ImmutableSentencePieceText_ImmutableSentencePiece::
+    ImmutableSentencePieceText_ImmutableSentencePiece()
+    : sp_(&SentencePieceText_SentencePiece::default_instance()) {}
+
+ImmutableSentencePieceText_ImmutableSentencePiece::
+    ImmutableSentencePieceText_ImmutableSentencePiece(
+        const SentencePieceText_SentencePiece &sp)
     : sp_(&sp) {}
 
-const std::string &ImmutableSentencePieceText::ImmutableSentencePiece::piece()
+const std::string &ImmutableSentencePieceText_ImmutableSentencePiece::piece()
     const {
   return sp_->piece();
 }
 
-const std::string &ImmutableSentencePieceText::ImmutableSentencePiece::surface()
+const std::string &ImmutableSentencePieceText_ImmutableSentencePiece::surface()
     const {
   return sp_->surface();
 }
 
-uint32_t ImmutableSentencePieceText::ImmutableSentencePiece::id() const {
+uint32_t ImmutableSentencePieceText_ImmutableSentencePiece::id() const {
   return sp_->id();
 }
 
-uint32_t ImmutableSentencePieceText::ImmutableSentencePiece::begin() const {
+uint32_t ImmutableSentencePieceText_ImmutableSentencePiece::begin() const {
   return sp_->begin();
 }
 
-uint32_t ImmutableSentencePieceText::ImmutableSentencePiece::end() const {
+uint32_t ImmutableSentencePieceText_ImmutableSentencePiece::end() const {
   return sp_->end();
 }
 
-std::vector<ImmutableSentencePieceText::ImmutableSentencePiece>
+std::vector<ImmutableSentencePieceText_ImmutableSentencePiece>
 ImmutableSentencePieceText::pieces() const {
-  std::vector<ImmutableSentencePieceText::ImmutableSentencePiece> pieces;
-  if (spt_ == nullptr) return pieces;
-  pieces.reserve(spt_->pieces_size());
+  std::vector<ImmutableSentencePieceText_ImmutableSentencePiece> pieces(
+      spt_->pieces_size());
   for (int i = 0; i < spt_->pieces_size(); ++i)
-    pieces[i] = ImmutableSentencePiece(spt_->pieces(i));
+    pieces[i] =
+        ImmutableSentencePieceText_ImmutableSentencePiece(spt_->pieces(i));
   return pieces;
 }
 
 size_t ImmutableSentencePieceText::pieces_size() const {
-  return spt_ ? spt_->pieces_size() : 0;
+  return spt_->pieces_size();
 }
 
-ImmutableSentencePieceText::ImmutableSentencePiece
+ImmutableSentencePieceText_ImmutableSentencePiece
 ImmutableSentencePieceText::pieces(int index) const {
-  return ImmutableSentencePieceText::ImmutableSentencePiece(
-      spt_->pieces(index));
+  return ImmutableSentencePieceText_ImmutableSentencePiece(spt_->pieces(index));
 }
 
 const std::string &ImmutableSentencePieceText::text() const {
-  if (spt_) return spt_->text();
-  static std::string *kEmptyString = new std::string();
-  return *kEmptyString;
+  return spt_->text();
 }
 
 float ImmutableSentencePieceText::score() const {
@@ -127,8 +132,8 @@ SentencePieceText *ImmutableSentencePieceText::mutable_proto() {
   return rep_.get();
 }
 
-std::string ImmutableSentencePieceText::SerializeAsString() const {
-  return spt_ ? spt_->SerializeAsString() : "";
+util::bytes ImmutableSentencePieceText::SerializeAsString() const {
+  return spt_->SerializeAsString();
 }
 
 ImmutableNBestSentencePieceText::ImmutableNBestSentencePieceText() {}
@@ -145,9 +150,8 @@ ImmutableSentencePieceText ImmutableNBestSentencePieceText::nbests(
 
 std::vector<ImmutableSentencePieceText>
 ImmutableNBestSentencePieceText::nbests() const {
-  std::vector<ImmutableSentencePieceText> nbests;
-  if (rep_ == nullptr) return nbests;
-  nbests.reserve(rep_->nbests_size());
+  if (rep_ == nullptr) return {};
+  std::vector<ImmutableSentencePieceText> nbests(rep_->nbests_size());
   for (int i = 0; i < rep_->nbests_size(); ++i)
     nbests[i] = ImmutableSentencePieceText(rep_->nbests(i));
   return nbests;
@@ -160,7 +164,7 @@ NBestSentencePieceText *ImmutableNBestSentencePieceText::mutable_proto() {
   return rep_.get();
 }
 
-std::string ImmutableNBestSentencePieceText::SerializeAsString() const {
+util::bytes ImmutableNBestSentencePieceText::SerializeAsString() const {
   return rep_ ? rep_->SerializeAsString() : "";
 }
 
@@ -1044,8 +1048,35 @@ std::string SentencePieceProcessor::serialized_model_proto() const {
 // std::random_device.
 void SetRandomGeneratorSeed(unsigned int seed);
 
-namespace io {
+void ConvertToUnicodeSpans(SentencePieceText *spt) {
+  if (spt == nullptr) return;
+
+  std::vector<int> utf8_to_unicode(spt->text().size() + 1, 0);
+  absl::string_view str = spt->text();
+  size_t prev = 0;
+  int ulen = 0;
+  while (!str.empty()) {
+    const size_t mblen = string_util::OneCharLen(str.data());
+    for (int i = prev; i < prev + mblen; ++i) {
+      utf8_to_unicode[i] = ulen;
+    }
+    ++ulen;
+    prev += mblen;
+    str.remove_prefix(mblen);
+  }
+  utf8_to_unicode[prev] = ulen;
+
+  auto clip = [&](int s) {
+    return std::min<int>(std::max<int>(0, s), utf8_to_unicode.size() - 1);
+  };
 
+  for (auto &piece : *(spt->mutable_pieces())) {
+    piece.set_begin(utf8_to_unicode[clip(piece.begin())]);
+    piece.set_end(utf8_to_unicode[clip(piece.end())]);
+  }
+}
+
+namespace io {
 util::Status LoadModelProto(absl::string_view filename,
                             ModelProto *model_proto) {
   if (filename.empty()) {
index 8124c5923591eae7523e9caccd44c3264364fdfd..b7fae6a3defe6462b5de3a0894ad268ed8806379 100644 (file)
@@ -157,35 +157,39 @@ class SentencePieceText_SentencePiece;
 // This wrapper only allows an immutable access to the proto and
 // hides the actual implementation of protobuf.
 // See sentencepiece.proto for the details of this class.
+class ImmutableSentencePieceText_ImmutableSentencePiece {
+ public:
+  ImmutableSentencePieceText_ImmutableSentencePiece();
+  ~ImmutableSentencePieceText_ImmutableSentencePiece() = default;
+
+  const std::string &piece() const;
+  const std::string &surface() const;
+  uint32_t id() const;
+  uint32_t begin() const;
+  uint32_t end() const;
+
+  friend class ImmutableSentencePieceText;
+
+ private:
+  explicit ImmutableSentencePieceText_ImmutableSentencePiece(
+      const SentencePieceText_SentencePiece &sp);
+  const SentencePieceText_SentencePiece *sp_ = nullptr;
+};
+
 class ImmutableSentencePieceText {
  public:
   ImmutableSentencePieceText();
   virtual ~ImmutableSentencePieceText();
 
-  class ImmutableSentencePiece {
-   public:
-    ~ImmutableSentencePiece() = default;
-    const std::string &piece() const;
-    const std::string &surface() const;
-    uint32_t id() const;
-    uint32_t begin() const;
-    uint32_t end() const;
+  std::vector<ImmutableSentencePieceText_ImmutableSentencePiece> pieces() const;
 
-    friend class ImmutableSentencePieceText;
-
-   private:
-    ImmutableSentencePiece() = default;
-    explicit ImmutableSentencePiece(const SentencePieceText_SentencePiece &sp);
-    const SentencePieceText_SentencePiece *sp_ = nullptr;
-  };
-
-  std::vector<ImmutableSentencePiece> pieces() const;
   size_t pieces_size() const;
-  ImmutableSentencePiece pieces(int index) const;
+  ImmutableSentencePieceText_ImmutableSentencePiece pieces(int index) const;
+
   const std::string &text() const;
   float score() const;
 
-  std::string SerializeAsString() const;
+  util::bytes SerializeAsString() const;
 
   // Returns the actual mutable proto.
   // Do not use this outside of SentencePieceProcessor, as
@@ -214,7 +218,7 @@ class ImmutableNBestSentencePieceText {
   size_t nbests_size() const;
   ImmutableSentencePieceText nbests(int index) const;
 
-  std::string SerializeAsString() const;
+  util::bytes SerializeAsString() const;
 
   // Returns the actual mutable proto.
   // Do not use this outside of SentencePieceProcessor, as
@@ -398,7 +402,7 @@ class SentencePieceProcessor {
                                     float alpha, SentencePieceText *spt) const;
 
   virtual util::Status SampleEncodeAndScore(
-      absl::string_view input, int samples, float alpha, bool wor,
+      absl::string_view input, int num_samples, float alpha, bool wor,
       bool include_best, NBestSentencePieceText *samples_spt) const;
 
   // DEPRECATED: Remove this API and use std::vector<std::string_view>
@@ -534,11 +538,11 @@ class SentencePieceProcessor {
   }
 
   virtual util::bytes SampleEncodeAndScoreAsSerializedProto(
-      absl::string_view input, int samples, float alpha, bool wor,
-      bool include_best, int nbest_size) const {
+      absl::string_view input, int num_samples, float alpha, bool wor,
+      bool include_best) const {
     DEFINE_SPP_SERIALIZED_PROTO_IMPL(SampleEncodeAndScore,
                                      ImmutableNBestSentencePieceText, input,
-                                     samples, alpha, wor, include_best);
+                                     num_samples, alpha, wor, include_best);
   }
 
   // TODO(taku): Remove this API and use std::vector<std::string_view>
@@ -579,11 +583,11 @@ class SentencePieceProcessor {
   }
 
   virtual ImmutableNBestSentencePieceText SampleEncodeAndScoreAsImmutableProto(
-      absl::string_view input, int samples, float alpha, bool wor,
-      bool include_best, int nbest_size) const {
+      absl::string_view input, int num_samples, float alpha, bool wor,
+      bool include_best) const {
     DEFINE_SPP_IMMUTABLE_PROTO_IMPL(SampleEncodeAndScore,
                                     ImmutableNBestSentencePieceText, input,
-                                    samples, alpha, wor, include_best);
+                                    num_samples, alpha, wor, include_best);
   }
 
   // TODO(taku): Remove this API and use std::vector<std::string_view>
@@ -703,6 +707,9 @@ class SentencePieceProcessor {
 // std::random_device.
 void SetRandomGeneratorSeed(unsigned int seed);
 
+// Converts the utf8 byte spans into Unicode char span.
+void ConvertToUnicodeSpans(SentencePieceText *spt);
+
 #ifndef SWIG
 // IO related functions to absorb model formats.
 namespace io {
index ed651f7c83e97d6eface33bc271618ab96b0ad9e..ff55aeb1cf3e17589b3a142e8fd32c69d5a25837 100644 (file)
@@ -1564,6 +1564,10 @@ TEST(SentencePieceProcessorTest, VocabularyTest) {
 
 TEST(SentencePieceProcessorTest, ImmutableSentencePieceTextTest) {
   ImmutableSentencePieceText spt;
+  EXPECT_TRUE(spt.text().empty());
+  EXPECT_EQ(spt.score(), 0.0);
+  EXPECT_TRUE(spt.SerializeAsString().empty());
+
   auto *v = spt.mutable_proto();
 
   v->set_text("hello world");
@@ -1586,52 +1590,123 @@ TEST(SentencePieceProcessorTest, ImmutableSentencePieceTextTest) {
     EXPECT_EQ(v->pieces(i).end(), spt.pieces(i).end());
   }
 
-  int n = 0;
-  for (auto &p : spt.pieces()) {
-    EXPECT_EQ(v->pieces(n).surface(), p.surface());
-    EXPECT_EQ(v->pieces(n).piece(), p.piece());
-    EXPECT_EQ(v->pieces(n).id(), p.id());
-    EXPECT_EQ(v->pieces(n).begin(), p.begin());
-    EXPECT_EQ(v->pieces(n).end(), p.end());
-    ++n;
-  }
-
-  EXPECT_EQ(v->text(), spt.text());
-  EXPECT_EQ(v->score(), spt.score());
-  EXPECT_EQ(v->SerializeAsString(), spt.SerializeAsString());
+  auto check_proto = [&v](const ImmutableSentencePieceText &s) {
+    int n = 0;
+    for (auto &p : s.pieces()) {
+      EXPECT_EQ(v->pieces(n).surface(), p.surface());
+      EXPECT_EQ(v->pieces(n).piece(), p.piece());
+      EXPECT_EQ(v->pieces(n).id(), p.id());
+      EXPECT_EQ(v->pieces(n).begin(), p.begin());
+      EXPECT_EQ(v->pieces(n).end(), p.end());
+      ++n;
+    }
+    EXPECT_EQ(v->text(), s.text());
+    EXPECT_EQ(v->score(), s.score());
+    EXPECT_EQ(v->SerializeAsString(), s.SerializeAsString());
+  };
 
   // test copy.
-  auto spt2 = spt;
-  EXPECT_EQ(spt2.pieces_size(), spt.pieces_size());
-  for (int i = 0; i < spt.pieces_size(); ++i) {
-    EXPECT_EQ(spt2.pieces(i).surface(), spt.pieces(i).surface());
-    EXPECT_EQ(spt2.pieces(i).piece(), spt.pieces(i).piece());
-    EXPECT_EQ(spt2.pieces(i).id(), spt.pieces(i).id());
-    EXPECT_EQ(spt2.pieces(i).begin(), spt.pieces(i).begin());
-    EXPECT_EQ(spt2.pieces(i).end(), spt.pieces(i).end());
-  }
+  const auto spt2 = spt;
+  check_proto(spt2);
+
+  // test assign.
+  const ImmutableSentencePieceText spt3(spt);
+  check_proto(spt3);
+
+  // default piece.
+  const ImmutableSentencePieceText_ImmutableSentencePiece piece;
+  EXPECT_TRUE(piece.surface().empty());
+  EXPECT_TRUE(piece.piece().empty());
+  EXPECT_EQ(piece.begin(), 0);
+  EXPECT_EQ(piece.end(), 0);
+  EXPECT_EQ(piece.id(), 0);
 }
 
 TEST(SentencePieceProcessorTest, ImmutableNBestSentencePieceTextTest) {
   ImmutableNBestSentencePieceText spt;
+  EXPECT_EQ(spt.nbests_size(), 0);
+  EXPECT_TRUE(spt.SerializeAsString().empty());
+
   auto *v = spt.mutable_proto();
+
   for (int i = 0; i < 10; ++i) {
     auto *p = v->add_nbests();
     p->set_text(absl::StrCat("text_", i));
     p->set_score(2.0 * i);
   }
 
-  EXPECT_EQ(v->nbests_size(), spt.nbests_size());
-  for (int i = 0; i < v->nbests_size(); ++i) {
-    EXPECT_EQ(v->nbests(i).text(), spt.nbests(i).text());
-    EXPECT_EQ(v->nbests(i).score(), spt.nbests(i).score());
-  }
-  EXPECT_EQ(v->SerializeAsString(), spt.SerializeAsString());
+  auto check_proto = [&v](const ImmutableNBestSentencePieceText &s) {
+    EXPECT_EQ(v->nbests_size(), s.nbests_size());
+    for (int i = 0; i < v->nbests_size(); ++i) {
+      EXPECT_EQ(v->nbests(i).text(), s.nbests(i).text());
+      EXPECT_EQ(v->nbests(i).score(), s.nbests(i).score());
+    }
+    EXPECT_EQ(v->SerializeAsString(), s.SerializeAsString());
+  };
+
+  check_proto(spt);
 
   // test copy.
-  auto spt2 = spt;
-  EXPECT_EQ(spt2.nbests_size(), spt.nbests_size());
-  EXPECT_EQ(spt2.SerializeAsString(), spt.SerializeAsString());
+  const auto spt2 = spt;
+  check_proto(spt2);
+
+  // test assign.
+  const ImmutableNBestSentencePieceText spt3(spt);
+  check_proto(spt3);
+}
+
+TEST(SentencePieceProcessorTest, ConvertToUnicodeSpansTest) {
+  auto make_spt = [&](const std::vector<std::string> &tokens) {
+    SentencePieceText spt;
+    int prev = 0;
+    std::string text;
+    for (const auto &tok : tokens) {
+      auto *piece = spt.add_pieces();
+      piece->set_surface(tok);
+      piece->set_piece(tok);
+      piece->set_begin(prev);
+      piece->set_end(prev + tok.size());
+      prev += tok.size();
+      text += tok;
+    }
+    spt.set_text(text);
+    ConvertToUnicodeSpans(&spt);
+    return spt;
+  };
+
+  {
+    const auto spt = make_spt({"hello", "_world", "."});
+    EXPECT_EQ(spt.pieces_size(), 3);
+    EXPECT_EQ(spt.pieces(0).begin(), 0);
+    EXPECT_EQ(spt.pieces(0).end(), 5);
+    EXPECT_EQ(spt.pieces(1).begin(), 5);
+    EXPECT_EQ(spt.pieces(1).end(), 11);
+    EXPECT_EQ(spt.pieces(2).begin(), 11);
+    EXPECT_EQ(spt.pieces(2).end(), 12);
+  }
+
+  {
+    const auto spt = make_spt({"これは", "test", "です"});
+    EXPECT_EQ(spt.pieces_size(), 3);
+    EXPECT_EQ(spt.pieces(0).begin(), 0);
+    EXPECT_EQ(spt.pieces(0).end(), 3);
+    EXPECT_EQ(spt.pieces(1).begin(), 3);
+    EXPECT_EQ(spt.pieces(1).end(), 7);
+
+    EXPECT_EQ(spt.pieces(2).begin(), 7);
+    EXPECT_EQ(spt.pieces(2).end(), 9);
+  }
+
+  {
+    const auto spt = make_spt({"いABは", "にほCD", "へと"});
+    EXPECT_EQ(spt.pieces_size(), 3);
+    EXPECT_EQ(spt.pieces(0).begin(), 0);
+    EXPECT_EQ(spt.pieces(0).end(), 4);
+    EXPECT_EQ(spt.pieces(1).begin(), 4);
+    EXPECT_EQ(spt.pieces(1).end(), 8);
+    EXPECT_EQ(spt.pieces(2).begin(), 8);
+    EXPECT_EQ(spt.pieces(2).end(), 10);
+  }
 }
 
 }  // namespace sentencepiece