option(SPM_USE_BUILTIN_PROTOBUF "Use built-in protobuf" ON)
option(SPM_USE_EXTERNAL_ABSL "Use external abseil" OFF)
option(SPM_ENABLE_MSVC_MT_BUILD, "Use /MT flag in MSVC build" OFF)
+option(SPM_CROSS_SYSTEM_PROCESSOR, "Override system processor" "")
+
+if (SPM_CROSS_SYSTEM_PROCESSOR)
+ set(CMAKE_SYSTEM_PROCESSOR ${SPM_CROSS_SYSTEM_PROCESSOR})
+endif()
# Disable shared build on windows
if(WIN32)
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips") OR
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "m68k") OR
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc") OR
+ (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") OR
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch") OR
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "sh4"))
find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1)
endif()
endif()
+
if (SPM_ENABLE_SHARED)
add_library(sentencepiece SHARED ${SPM_SRCS})
add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
std::string blob;
blob.append(string_util::EncodePOD<uint32>(trie_blob.size()));
blob.append(trie_blob.data(), trie_blob.size());
- blob.append(normalized.data(), normalized.size());
#ifdef IS_BIG_ENDIAN
uint32 *data = reinterpret_cast<uint32 *>(const_cast<char *>(blob.data()));
- for (int i = 0; i <= trie_blob.size() / 4; ++i)
- data[i] = util::Swap32(data[i]);
+ for (int i = 0; i < blob.size() / 4; ++i) data[i] = util::Swap32(data[i]);
#endif
+ blob.append(normalized.data(), normalized.size());
+
return blob;
}
if (blob.size() <= sizeof(trie_blob_size) ||
!string_util::DecodePOD<uint32>(
absl::string_view(blob.data(), sizeof(trie_blob_size)),
- &trie_blob_size) ||
- trie_blob_size >= blob.size()) {
+ &trie_blob_size)) {
return util::InternalError("Blob for normalization rule is broken.");
}
trie_blob_size = util::Swap32(trie_blob_size);
#endif
- if (trie_blob_size >= blob.size())
+ if (trie_blob_size >= blob.size()) {
return util::InternalError("Trie data size exceeds the input blob size.");
+ }
blob.remove_prefix(sizeof(trie_blob_size));
#ifdef IS_BIG_ENDIAN
+ CHECK_OR_RETURN(buffer);
buffer->assign(blob.data(), trie_blob_size);
uint32 *data = reinterpret_cast<uint32 *>(const_cast<char *>(buffer->data()));
- for (int i = 0; i < trie_blob_size / 4; ++i) data[i] = util::Swap32(data[i]);
+ for (int i = 0; i < buffer->size() / 4; ++i) data[i] = util::Swap32(data[i]);
*trie_blob = absl::string_view(buffer->data(), trie_blob_size);
#else
*trie_blob = absl::string_view(blob.data(), trie_blob_size);
TrainerResult res;
res.seed_pieces_and_probs = seed_pieces;
+ std::sort(pieces.begin(), pieces.end());
res.sentence_pieces = absl::StrJoin(pieces, " ");
return res;
}
// Check seed pieces.
EXPECT_EQ(27, res.seed_pieces_and_probs.size());
- LOG(INFO) << "[" << res.sentence_pieces << "]";
-
// Check final pieces.
- EXPECT_EQ("i a n y m l e apple ve O P r g t an v ▁ b A le ▁an p d h",
+ EXPECT_EQ("A O P a an apple b d e g h i l le m n p r t v ve y ▁ ▁an",
res.sentence_pieces);
}