void testLineBreaking();
void testWordBoundaries();
+ void testSentenceBoundaries();
void testGraphemeIteration();
void testWeak();
void testAsian();
void testJapanese();
void testChinese();
+ void testLegacyDictWordPrepostDash_de_DE();
+ void testLegacyDictWordPrepostDash_nds_DE();
+ void testLegacyDictWordPrepostDash_nl_NL();
+ void testLegacyDictWordPrepostDash_sv_SE();
+ void testLegacyHebrewQuoteInsideWord();
+ void testLegacySurrogatePairs();
+ void testLegacyWordCountCompat();
+
CPPUNIT_TEST_SUITE(TestBreakIterator);
CPPUNIT_TEST(testLineBreaking);
CPPUNIT_TEST(testWordBoundaries);
+ CPPUNIT_TEST(testSentenceBoundaries);
CPPUNIT_TEST(testGraphemeIteration);
CPPUNIT_TEST(testWeak);
CPPUNIT_TEST(testAsian);
#endif
CPPUNIT_TEST(testJapanese);
CPPUNIT_TEST(testChinese);
+ CPPUNIT_TEST(testLegacyDictWordPrepostDash_de_DE);
+ CPPUNIT_TEST(testLegacyDictWordPrepostDash_nds_DE);
+ CPPUNIT_TEST(testLegacyDictWordPrepostDash_nl_NL);
+ CPPUNIT_TEST(testLegacyDictWordPrepostDash_sv_SE);
+ CPPUNIT_TEST(testLegacyHebrewQuoteInsideWord);
+ CPPUNIT_TEST(testLegacySurrogatePairs);
+ CPPUNIT_TEST(testLegacyWordCountCompat);
CPPUNIT_TEST_SUITE_END();
private:
}
}
+ // i#22602: writer breaks word after dot immediately followed by a letter
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ //Here we want the line break to leave ./bar/baz clumped together on the next line
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "foo ./bar/baz", strlen("foo ./bar/ba"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first period",
+ static_cast<sal_Int32>(4), aResult.breakIndex);
+ }
+ }
+
+ // i#81448: slash and backslash make non-breaking spaces of preceding spaces
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // Per the bug, the line break should leave ...BE clumped together on the next line.
+ // However, the current behavior does not wrap the string at all. This test asserts the
+ // current behavior as a point of reference.
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "THIS... ...BE", strlen("THIS... ...B"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aResult.breakIndex);
+ }
+ }
+
+ // i#81448: slash and backslash make non-breaking spaces of preceding spaces
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // The line break should leave /BE clumped together on the next line.
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "THIS... /BE", strlen("THIS... /B"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(8), aResult.breakIndex);
+ }
+ }
+
+ // i#80548: Bad word wrap between dash and word
+ {
+ aLocale.Language = "fi";
+ aLocale.Country = "FI";
+
+ {
+ // Per the bug, the line break should leave -bar clumped together on the next line.
+ // However, this change was reverted at some point. This test asserts the new behavior.
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "foo -bar", strlen("foo -ba"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first dash",
+ static_cast<sal_Int32>(5), aResult.breakIndex);
+ }
+ }
+
+ // i#80645: Line erroneously breaks at backslash
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // Here we want the line break to leave C:\Program Files\ on the first line
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "C:\\Program Files\\LibreOffice", strlen("C:\\Program Files\\Libre"), aLocale, 0,
+ aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(17), aResult.breakIndex);
+ }
+ }
+
+ // i#80841: Words separated by hyphens will always break to next line
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // Here we want the line break to leave toll- on the first line
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "toll-free", strlen("toll-fr"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(5), aResult.breakIndex);
+ }
+ }
+
+ // i#83464: Line break between letter and $
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // Here we want the line break to leave US$ clumped on the next line.
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "word US$ 123", strlen("word U"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(5), aResult.breakIndex);
+ }
+ }
+
+ // Unknown bug number: "fix line break problem of dot after letter and before number"
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // Here we want the line break to leave US$ clumped on the next line.
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "word L.5 word", strlen("word L"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(5), aResult.breakIndex);
+ }
+ }
+
+ // i#83229: Wrong line break when word contains a hyphen
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ {
+ // Here we want the line break to leave 100- clumped on the first line.
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ "word 100-199 word", strlen("word 100-1"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(9), aResult.breakIndex);
+ }
+ }
+
+ // i#83649: Line break should be between typographical quote and left bracket
+ {
+ aLocale.Language = "de";
+ aLocale.Country = "DE";
+
+ {
+ // Here we want the line break to leave »angetan werden« on the first line
+ const OUString str = u"»angetan werden« [Passiv]"_ustr;
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ str, strlen("Xangetan werdenX ["), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(17), aResult.breakIndex);
+ }
+ }
+
+ // i#72868: Writer/Impress line does not break after Chinese punctuation and Latin letters
+ {
+ aLocale.Language = "zh";
+ aLocale.Country = "HK";
+
+ {
+ // Per the bug, this should break at the ideographic comma. However, this change has
+ // been reverted at some point. This test only verifies current behavior.
+ const OUString str = u"word word、word word"_ustr;
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(
+ str, strlen("word wordXwor"), aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(13), aResult.breakIndex);
+ }
+ }
+
+ // i#80891: Character in the forbidden list sometimes appears at the start of line
+ {
+ aLocale.Language = "zh";
+ aLocale.Country = "HK";
+
+ {
+ // Per the bug, the ideographic two-dot leader should be a forbidden character. However,
+ // this change seems to have been reverted or broken at some point.
+ const OUString str = u"電話︰電話"_ustr;
+ i18n::LineBreakResults aResult
+ = m_xBreak->getLineBreak(str, 2, aLocale, 0, aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aResult.breakIndex);
+ }
+ }
+
//See https://bz.apache.org/ooo/show_bug.cgi?id=19716
{
aLocale.Language = "en";
CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast<sal_Int32>(5), aResult.breakIndex);
}
}
+
+ // i#65267: Comma is badly broken at end of line
+ // - The word should be wrapped along with the comma
+ {
+ aLocale.Language = "de";
+ aLocale.Country = "DE";
+
+ {
+ auto res = m_xBreak->getLineBreak("Wort -prinzessinnen, wort",
+ strlen("Wort -prinzessinnen,"), aLocale, 0,
+ aHyphOptions, aUserOptions);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32{ 6 }, res.breakIndex);
+ }
+ }
}
//See https://bugs.libreoffice.org/show_bug.cgi?id=49629
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
}
+
+ // i#55778: Words containing numbers get broken up
+ {
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ static constexpr OUString aTest = u"first i18n third"_ustr;
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.endPos);
+ }
+
+ // i#56347: "BreakIterator patch for Hungarian"
+ // Rules for Hungarian affixes after numbers and certain symbols
+ {
+ auto mode = i18n::WordType::DICTIONARY_WORD;
+ aLocale.Language = "hu";
+ aLocale.Country = "HU";
+
+ OUString aTest = u"szavak 15 15-tel 15%-kal €-val szavak"_ustr;
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 2, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 11, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 18, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 25, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 27, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 34, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.endPos);
+ }
+
+ // i#56348: Special chars in first pos not handled by spell checking in Writer (Hungarian)
+ // Rules for Hungarian affixes after numbers and certain symbols in edit mode.
+ // The patch was merged, but the original bug was never closed and the current behavior seems
+ // identical to the ICU default behavior. Added this test to ensure that doesn't change.
+ {
+ auto mode = i18n::WordType::ANY_WORD;
+ aLocale.Language = "hu";
+ aLocale.Country = "HU";
+
+ OUString aTest = u"szavak 15 15-tel 15%-kal €-val szavak"_ustr;
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 2, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 11, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 11, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(13), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 13, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(13), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 17, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 19, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(20), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 20, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(20), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(21), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 21, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(21), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 24, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 25, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(26), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 26, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(26), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(27), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 27, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(27), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 30, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.endPos);
+
+ aBounds = m_xBreak->getWordBoundary(aTest, 31, aLocale, mode, true);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testSentenceBoundaries()
+{
+ lang::Locale aLocale;
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ // Trivial characteristic test for sentence boundary detection
+ {
+ OUString aTest("This is a sentence. This is a different sentence.");
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), m_xBreak->beginOfSentence(aTest, 5, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(19), m_xBreak->endOfSentence(aTest, 5, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(20), m_xBreak->beginOfSentence(aTest, 31, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(49), m_xBreak->endOfSentence(aTest, 31, aLocale));
+ }
+
+ // i#24098: i18n API beginOfSentence/endOfSentence
+ // fix beginOfSentence, ... when cursor is on the beginning of the sentence
+ {
+ OUString aTest("This is a sentence. This is a different sentence.");
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(20), m_xBreak->beginOfSentence(aTest, 20, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(49), m_xBreak->endOfSentence(aTest, 20, aLocale));
+ }
+
+ // i#24098: i18n API beginOfSentence/endOfSentence
+ // "skip preceding space for beginOfSentence"
+ {
+ OUString aTest("This is a sentence. This is a different sentence.");
+
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), m_xBreak->beginOfSentence(aTest, 20, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(19), m_xBreak->endOfSentence(aTest, 20, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(24), m_xBreak->beginOfSentence(aTest, 26, aLocale));
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(53), m_xBreak->endOfSentence(aTest, 26, aLocale));
+ }
}
//See https://bugs.libreoffice.org/show_bug.cgi?id=40292
CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos);
}
}
+
+void TestBreakIterator::testLegacyDictWordPrepostDash_de_DE()
+{
+ lang::Locale aLocale;
+ aLocale.Language = "de";
+ aLocale.Country = "DE";
+
+ {
+ auto aTest = u"Arbeits- -nehmer"_ustr;
+
+ i18n::Boundary aBounds
+ = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testLegacyDictWordPrepostDash_nds_DE()
+{
+ lang::Locale aLocale;
+ aLocale.Language = "nds";
+ aLocale.Country = "DE";
+
+ {
+ auto aTest = u"Arbeits- -nehmer"_ustr;
+
+ i18n::Boundary aBounds
+ = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testLegacyDictWordPrepostDash_nl_NL()
+{
+ lang::Locale aLocale;
+ aLocale.Language = "nl";
+ aLocale.Country = "NL";
+
+ {
+ auto aTest = u"Arbeits- -nehmer"_ustr;
+
+ i18n::Boundary aBounds
+ = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testLegacyDictWordPrepostDash_sv_SE()
+{
+ lang::Locale aLocale;
+ aLocale.Language = "sv";
+ aLocale.Country = "SE";
+
+ {
+ auto aTest = u"Arbeits- -nehmer"_ustr;
+
+ i18n::Boundary aBounds
+ = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testLegacyHebrewQuoteInsideWord()
+{
+ lang::Locale aLocale;
+
+ aLocale.Language = "he";
+ aLocale.Country = "IL";
+
+ {
+ auto aTest = u"פַּרְדּ״ס פַּרְדּ\"ס"_ustr;
+
+ i18n::Boundary aBounds
+ = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testLegacySurrogatePairs()
+{
+ lang::Locale aLocale;
+
+ aLocale.Language = "ja";
+ aLocale.Country = "JP";
+
+ // i#75632: [surrogate pair] Japanese word break does not work properly for surrogate pairs.
+ // and many others to address bugs: i#75631 i#75633 i#75412 etc.
+ //
+ // BreakIterator supports surrogate pairs (UTF-16). This is a simple characteristic test.
+ {
+ const sal_Unicode buf[] = { u"X 𠮟 X" };
+ OUString aTest(buf, SAL_N_ELEMENTS(buf));
+
+ auto aBounds
+ = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 2, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+
+ aBounds
+ = m_xBreak->getWordBoundary(aTest, 5, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos);
+ CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos);
+ }
+}
+
+void TestBreakIterator::testLegacyWordCountCompat()
+{
+ lang::Locale aLocale;
+
+ aLocale.Language = "en";
+ aLocale.Country = "US";
+
+ // i#80815: "Word count differs from MS Word"
+ // This is a characteristic test for word count using test data from the linked bug.
+ {
+ const OUString str = u""
+ "test data for word count issue #80815\n"
+ "fo\\\'sforos\n"
+ "archipi\\\'elago\n"
+ "do\\^me\n"
+ "f**k\n"
+ "\n"
+ "battery-driven\n"
+ "and/or\n"
+ "apple(s)\n"
+ "money+opportunity\n"
+ "Micro$oft\n"
+ "\n"
+ "300$\n"
+ "I(not you)\n"
+ "a****n\n"
+ "1+3=4\n"
+ "\n"
+ "aaaaaaa.aaaaaaa\n"
+ "aaaaaaa,aaaaaaa\n"
+ "aaaaaaa;aaaaaaa\n"_ustr;
+
+ int num_words = 0;
+ sal_Int32 next_pos = 0;
+ int iter_guard = 0;
+ while (true)
+ {
+ CPPUNIT_ASSERT_MESSAGE("Tripped infinite loop check", ++iter_guard < 100);
+
+ auto aBounds = m_xBreak->nextWord(str, next_pos, aLocale, i18n::WordType::WORD_COUNT);
+
+ if (aBounds.endPos < next_pos)
+ {
+ break;
+ }
+
+ next_pos = aBounds.endPos;
+ ++num_words;
+ }
+
+ CPPUNIT_ASSERT_EQUAL(23, num_words);
+ }
+}
+
void TestBreakIterator::setUp()
{
BootstrapFixtureBase::setUp();
sync. It unclear which diffs from the base versions are deliberate and which
are now accidental :-(
-We need to review the various issues referenced in the commits that caused
-customizations and see if they're still relevant or not, write regression tests
-for them, if any are still relevant then apply the changes back on top of the
-latest versions.
+The various issues and customizations have been reviewed, with tests written for
+customizations that are still relevant. However, these files are still extremely
+out-of-date and need to be refreshed. Relevant customizations should be reapplied
+on top of a current version.
-to-review, later are ok:
-
-commit e1ad946ef5db3f7c0a540207d0f0fd85799e3b66
-Author: Release Engineers <releng@openoffice.org>
-Date: Thu Aug 6 18:13:57 2009 +0000
-
- CWS-TOOLING: integrate CWS tl73
- 2009-07-31 15:29:33 +0200 tl r274535 : #i64400# dash/hyphen should not break words
-
-commit 9964a76ef58786bba47d409970512d7ded6c8889
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Wed Jul 2 07:53:05 2008 +0000
-
- INTEGRATION: CWS i18n41 (1.1.2); FILE ADDED
- 2008/04/25 17:06:26 khong 1.1.2.3: i55063, make period a sentence delimiter
- 2008/04/25 06:40:50 khong 1.1.2.2: i55063, make space as Thai sentence delimiter
- 2008/04/24 03:19:10 khong 1.1.2.1: i55063, set Thai letters as sentence delimiter for Thai and English mixed text
-
-commit e4a6e4284dae1ca6fbfa7d1e43690dbf87d796cd
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Wed Jul 2 07:52:44 2008 +0000
-
- INTEGRATION: CWS i18n41 (1.9.12); FILE MERGED
- 2008/06/17 20:22:30 khong 1.9.12.2: i83229 fix the problem of leading hyphen for numbers
- 2008/04/23 06:20:16 khong 1.9.12.1: i72868, i80891, i83229, fix Chinese punctuations and hyphen for line breakiterator
-
-commit 55dff22611659a1567c968fbf9e512a2765ab62e
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Wed Jul 2 07:52:07 2008 +0000
-
- INTEGRATION: CWS i18n41 (1.33.36); FILE MERGED
- 2008/06/05 22:18:29 khong 1.33.36.2: RESYNC: (1.33-1.35); FILE MERGED
- 2008/04/23 06:11:55 khong 1.33.36.1: i55063, enable language specific sentence breakiterator
-
-commit 1c2b8095631a3c2d2f396bf50a8f0c62f49be65c
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Wed Jul 2 07:51:12 2008 +0000
-
- INTEGRATION: CWS i18n41 (1.12.140); FILE MERGED
- 2008/06/05 22:18:26 khong 1.12.140.2: RESYNC: (1.12-1.13); FILE MERGED
- 2008/04/23 06:04:53 khong 1.12.140.1: i87530 avoid breaking line before un-completed cell
-
-commit 9bbdb52df370c69c0f7eba387a2068ee80bd7994
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Wed Jul 2 07:50:43 2008 +0000
-
- INTEGRATION: CWS i18n41 (1.25.2); FILE MERGED
- 2008/06/05 22:18:23 khong 1.25.2.2: RESYNC: (1.25-1.26); FILE MERGED
- 2008/04/23 06:09:02 khong 1.25.2.1: i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
-
-commit 8dcdd3ca268f78295731b86797c2b8cd447ba667
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Tue May 20 13:36:01 2008 +0000
-
- INTEGRATION: CWS i18n43_DEV300 (1.33.38); FILE MERGED
- 2008/04/29 21:51:51 khong 1.33.38.1: #i88411# apply the patch from Coleman Kane to fix icu setBreakType issue
-
-commit bedef98c24ef9ada6aaffe9bc5284d9759a31a9a
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Wed Apr 2 08:49:09 2008 +0000
-
- INTEGRATION: CWS i18n40 (1.2.314); FILE MERGED
- 2008/03/19 06:30:23 khong 1.2.314.2: #i80815# count dash like MS Word
- 2008/03/15 07:32:44 khong 1.2.314.1: #i80815# count punctuation as word
-
-commit 59144104b3f91a2e6ed816f0bde0fdb91ea218d7
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Wed Apr 2 08:48:53 2008 +0000
-
- INTEGRATION: CWS i18n40 (1.24.44); FILE MERGED
- 2008/03/19 18:56:42 khong 1.24.44.2: i80815 make word count feature like MS Word
- 2008/03/15 07:31:38 khong 1.24.44.1: #i80815# count punctuation as word
-
-commit 3f0b51776602c45e8aca991450fcbb30f2484ae5
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Mon Jan 28 14:33:46 2008 +0000
-
- INTEGRATION: CWS i18n39 (1.8.4); FILE MERGED
- 2007/12/12 17:45:45 khong 1.8.4.3: b6634800# fix line break problem of dot after letter and before number
- 2007/12/08 01:05:52 khong 1.8.4.2: #i83649# fixed the problem of line break between quotation mark and open bracket
- 2007/12/07 23:44:30 khong 1.8.4.1: #i83464# fix the problem of line break between letter and 1326
-
-commit 5d8ef209b1f63d1c8ea5014bdbef96660b355423
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Tue Oct 23 08:09:00 2007 +0000
-
- INTEGRATION: CWS i18n38 (1.7.4); FILE MERGED
- 2007/09/19 00:08:04 khong 1.7.4.3: i81448 fixed dot line break issue
- 2007/09/10 23:57:12 khong 1.7.4.2: i81440 fix the problem of line break on punctuations
- 2007/09/10 22:55:46 khong 1.7.4.1: i81448 fix problem of line break on symbols
-
-commit a2f3b48cacfcef338ca5e37acde34c83876e082e
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Tue Oct 23 08:08:47 2007 +0000
-
- INTEGRATION: CWS i18n38 (1.32.10); FILE MERGED
- 2007/09/18 20:32:39 khong 1.32.10.1: i81519 set break type icu breakiterator
-
-commit 1967d8fb182b3101dee4f715e78be384400bc1e8
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Wed Sep 5 16:37:28 2007 +0000
-
- INTEGRATION: CWS i18n37 (1.22.6); FILE MERGED
- 2007/09/03 18:27:39 khong 1.22.6.2: i8132 fixed a problem in skipping space for word breakiterator
- 2007/08/31 21:30:30 khong 1.22.6.1: i81158 fix skipping space problem
-
-commit d2c2baf1a31d281d20e8b4d4c806dda027b2d5a3
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Tue Aug 28 11:46:45 2007 +0000
-
- INTEGRATION: CWS i18n36_SRC680 (1.5.20.1.2); FILE MERGED
- 2007/08/22 17:12:36 khong 1.5.20.1.2.1: i80841 fix hyphen line break problem
-
-commit d56bedfb425cf77f176f143455e4a9fb6ce65540
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Tue Aug 28 11:46:34 2007 +0000
-
- INTEGRATION: CWS i18n36_SRC680 (1.21.2.1.2); FILE MERGED
- 2007/08/22 20:02:28 khong 1.21.2.1.2.2: i80923 fix infinite loop problem
- 2007/08/22 17:11:44 khong 1.21.2.1.2.1: i80923 fix a infinite loop
-
-commit 8a36b196925a5561eabde0a0ef293c73fcb5add3
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Fri Aug 17 13:58:48 2007 +0000
-
- INTEGRATION: CWS i18n34 (1.5.22); FILE MERGED
- 2007/08/13 22:26:12 khong 1.5.22.1: i80548 i80645 fix dash and backslash issues in line breakiterator
-
-commit c00b2b49bad765144f90552139e63d87d520d1cf
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Fri Aug 17 13:58:36 2007 +0000
-
- INTEGRATION: CWS i18n34 (1.15.4); FILE MERGED
- 2007/08/13 22:33:38 khong 1.15.4.1: i86439 fix surrogate characters handling issues
-
-commit 3fc5fbc71d4c244d7c8002aa530481741e585bd4
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Fri Aug 17 13:58:23 2007 +0000
-
- INTEGRATION: CWS i18n34 (1.31.4); FILE MERGED
- 2007/08/13 22:33:37 khong 1.31.4.1: i86439 fix surrogate characters handling issues
-
-commit ee44b43881e7c82c379931f111c452a477b73341
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Fri Aug 17 13:58:11 2007 +0000
-
- INTEGRATION: CWS i18n34 (1.21.4); FILE MERGED
- 2007/08/14 08:38:53 khong 1.21.4.2: i86439 fix surrogate characters handling issues
- 2007/08/13 22:33:37 khong 1.21.4.1: i86439 fix surrogate characters handling issues
-
-commit f47369dbbc385f8968ad43e43cba293a29a4c2df
-Author: Jens-Heiner Rechtien <hr@openoffice.org>
-Date: Tue Jul 31 16:09:13 2007 +0000
-
- INTEGRATION: CWS i18n32 (1.29.14); FILE MERGED
- 2007/07/24 20:39:44 khong 1.29.14.1: #i79148# fix a local word breakiterator rules loading issue
-
-commit 2791553b4e3fc5e04b96d0b2fd119d9fba1946bc
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jul 26 08:08:51 2007 +0000
-
- INTEGRATION: CWS i18n31 (1.14.60); FILE MERGED
- 2007/07/16 22:18:44 khong 1.14.60.4: i75631 i75632 i75633 i75412 handle surrogate pair characters
- 2007/07/13 20:37:32 khong 1.14.60.3: #i75632# use ICU characters properties
- 2007/07/04 01:17:22 khong 1.14.60.2: i75631 i75632 i75633 i75412 handle surrogate pair characters
- 2007/06/27 04:33:11 khong 1.14.60.1: i75631 i75632 i75633 i75412 handle surrogate pair characters
-
-commit 1c79a2bf1e89ac4eb409922ab7eb8ad3cacc688a
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jul 26 08:08:39 2007 +0000
-
- INTEGRATION: CWS i18n31 (1.8.60); FILE MERGED
- 2007/06/27 04:33:11 khong 1.8.60.1: i75631 i75632 i75633 i75412 handle surrogate pair characters
-
-commit 517bbaddbaf81a5a6bb00979944cad13a1575d50
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jul 26 08:08:27 2007 +0000
-
- INTEGRATION: CWS i18n31 (1.28.14); FILE MERGED
- 2007/07/13 20:37:32 khong 1.28.14.5: #i75632# use ICU characters properties
- 2007/07/04 01:17:22 khong 1.28.14.4: i75631 i75632 i75633 i75412 handle surrogate pair characters
- 2007/06/27 23:25:58 khong 1.28.14.3: i75412 handle surrogate pair characters
- 2007/06/27 05:33:20 khong 1.28.14.2: RESYNC: (1.28-1.29); FILE MERGED
- 2007/06/27 04:33:11 khong 1.28.14.1: i75631 i75632 i75633 i75412 handle surrogate pair characters
-
-commit 0154e3492f2527535c0d648274e7ff674674318b
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jul 26 08:08:14 2007 +0000
-
- INTEGRATION: CWS i18n31 (1.14.42); FILE MERGED
- 2007/06/27 05:33:03 khong 1.14.42.2: RESYNC: (1.14-1.15); FILE MERGED
- 2007/06/27 04:33:11 khong 1.14.42.1: i75631 i75632 i75633 i75412 handle surrogate pair characters
-
-commit e2a5a2532ee187669980adb7bfa747c7803c330a
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jul 26 08:08:02 2007 +0000
-
- INTEGRATION: CWS i18n31 (1.19.60); FILE MERGED
- 2007/07/13 20:37:32 khong 1.19.60.4: #i75632# use ICU characters properties
- 2007/07/04 01:17:22 khong 1.19.60.3: i75631 i75632 i75633 i75412 handle surrogate pair characters
- 2007/06/27 05:00:48 khong 1.19.60.2: i75231 handle surrogate pair characters
- 2007/06/27 04:33:11 khong 1.19.60.1: i75631 i75632 i75633 i75412 handle surrogate pair characters
-
-commit 80a26a7d4720b5b8cfa0acc624b28014c96d9948
-Author: Jens-Heiner Rechtien <hr@openoffice.org>
-Date: Tue Jun 26 16:41:02 2007 +0000
-
- INTEGRATION: CWS ause081 (1.2.332); FILE MERGED
- 2007/06/21 10:53:19 hjs 1.2.332.1: #i78393# remove component_getDescriptionFunc from exports
-
-commit c2801db6b04bf6f0dbb07727c91b2c66e7e027b8
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Wed Jun 6 11:17:38 2007 +0000
-
- INTEGRATION: CWS i18n30 (1.4.24); FILE MERGED
- 2007/05/08 21:32:18 khong 1.4.24.1: #i73903# update line breakiterator rule to icu3.6 style
-
-commit ea290668f78475c3b277c9e44bf5622ccb4dcec8
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Wed Jun 6 11:17:25 2007 +0000
-
- INTEGRATION: CWS i18n30 (1.28.4); FILE MERGED
- 2007/05/08 21:47:00 khong 1.28.4.3: #i75412# remove fix from cws i18n30, move it to other cws to fix with other Japanese surrogate issues
- 2007/03/20 18:39:58 khong 1.28.4.2: #i72589# fixed BS problem for surrogate characters
- 2007/03/13 19:11:44 khong 1.28.4.1: #i75319# fixed ANY_WORD rule loading problem
-
-commit b6308a6e322fd4eaa7845793beb70900624f351c
-Author: Ivo Hinkelmann <ihi@openoffice.org>
-Date: Wed Jun 6 11:17:12 2007 +0000
-
- INTEGRATION: CWS i18n30 (1.14.32); FILE MERGED
- 2007/05/08 21:44:15 khong 1.14.32.1: #i76706# fix infinite loop for CJK word breakiterator for text mixed with Latin and CJK characters
-
-commit e068e0e9aa9405ea4016ad19e9a963129adfed79
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jan 25 08:35:42 2007 +0000
-
- INTEGRATION: CWS i18n28 (1.1.2); FILE ADDED
- 2006/12/06 05:52:39 khong 1.1.2.1: #i64400# add an optional breakiterator entry in localedata
-
-commit 8d6f35a46085bb420e8896505504b376d17b842a
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Thu Jan 25 08:35:31 2007 +0000
-
- INTEGRATION: CWS i18n28 (1.24.36); FILE MERGED
- 2006/12/19 17:27:58 khong 1.24.36.2: RESYNC: (1.24-1.25); FILE MERGED
- 2006/12/06 05:52:38 khong 1.24.36.1: #i64400# add an optional breakiterator entry in localedata
-
-commit 633d34fa33330339ab6795ce3703477216e0062e
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Tue Dec 12 15:14:36 2006 +0000
-
- INTEGRATION: CWS icuupgrade (1.9.24); FILE MERGED
- 2006/10/11 06:11:11 khong 1.9.24.4: RESYNC: (1.10-1.11); FILE MERGED
- 2006/07/07 10:57:40 hdu 1.9.24.3: RESYNC: (1.9-1.10); FILE MERGED
- 2006/06/30 01:31:40 khong 1.9.24.2: #i53388# upgrade icu to 3.4.1
- 2006/06/15 19:16:55 khong 1.9.24.1: #i60645# upgrade icu to 3.4.1
-
-commit 5d46dabe95271c846601a2575d3304fd5b4b24f1
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Tue Dec 12 15:14:05 2006 +0000
-
- INTEGRATION: CWS icuupgrade (1.22.20); FILE MERGED
- 2006/11/11 07:12:47 khong 1.22.20.6: #142664# fix breakiterator crash problem
- 2006/10/11 06:10:51 khong 1.22.20.5: RESYNC: (1.23-1.24); FILE MERGED
- 2006/09/06 01:00:31 khong 1.22.20.4: #i60645# upgrade to icu 3.6
- 2006/07/07 10:57:32 hdu 1.22.20.3: RESYNC: (1.22-1.23); FILE MERGED
- 2006/06/30 01:31:40 khong 1.22.20.2: #i53388# upgrade icu to 3.4.1
- 2006/06/20 14:27:26 hdu 1.22.20.1: #i60645# fix crash when udata_open failed
-
-commit 7431d816cdfc47b08978c0afd1f6503644bb11b8
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Mon Nov 6 13:40:05 2006 +0000
-
- INTEGRATION: CWS i18n27 (1.3.142); FILE MERGED
- 2006/10/10 21:10:57 khong 1.3.142.1: #i65267# fix line break rule
-
-commit d7471e1462ffd9baeb3449eb86ccbb649e32b233
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Mon Nov 6 13:39:52 2006 +0000
-
- INTEGRATION: CWS i18n27 (1.1.2); FILE ADDED
- 2006/10/10 21:08:55 khong 1.1.2.1: #i56348# add Hungarian word break rule for edit mode
-
-commit 1b65b0b886e2cb16382bc11770230fb6a140f33b
-Author: Jens-Heiner Rechtien <hr@openoffice.org>
-Date: Tue Oct 24 12:53:13 2006 +0000
-
- INTEGRATION: CWS tl29 (1.12.24); FILE MERGED
- 2006/09/20 01:24:53 khong 1.12.24.1: #i69482# fixed mismatch of nextWord and getWordBoundary
-
-commit 97d89862a2285071202cc8010d888ffcbf96279a
-Author: Jens-Heiner Rechtien <hr@openoffice.org>
-Date: Thu Nov 17 19:30:35 2005 +0000
-
- INTEGRATION: CWS i18n23 (1.20.22); FILE MERGED
- 2005/11/17 20:00:37 khong 1.20.22.3: RESYNC: (1.20-1.21); FILE MERGED
- 2005/11/17 19:45:05 khong 1.20.22.2: #i57866# merge cws i18n23 and thaiissues
- 2005/11/15 21:10:24 khong 1.20.22.1: #i57866# fix line breakiterator problem
-
-commit 05fadde6f025bcaafca4f3093e88be3cc1bb6836
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Wed Nov 16 09:18:37 2005 +0000
-
- INTEGRATION: CWS thaiissues (1.20.6); FILE MERGED
- 2005/10/26 20:42:40 khong 1.20.6.2: use icu thai linke break algorithm for thai breakiterator
- 2005/10/26 13:36:24 fme 1.20.6.1: #i55716# Handling of WORDJOINER
-
-commit a10b0e70c641d7438c557ef718c6942b3abffaec
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Wed Nov 16 09:18:25 2005 +0000
-
- INTEGRATION: CWS thaiissues (1.8.6); FILE MERGED
- 2005/10/26 20:42:39 khong 1.8.6.1: use icu thai linke break algorithm for thai breakiterator
-
-commit 4a1f1586173839d532f90507c72306bc9e2aec56
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Wed Nov 16 09:18:11 2005 +0000
-
- INTEGRATION: CWS thaiissues (1.9.4); FILE MERGED
- 2005/10/28 17:54:39 khong 1.9.4.1: Fix a bug in ctl line break when there is word joiner character
-
-commit beb2a536738ba761a92f8266570f1859c85f94ae
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Tue Nov 8 15:59:16 2005 +0000
-
- INTEGRATION: CWS siloch (1.3.50); FILE MERGED
- 2005/10/26 10:55:05 er 1.3.50.1: #i56347# apply patch to recognize suffixes of numbers in Hungarian spellchecking; contributed by Nemeth Laszlo <nemeth@ooo>
-
-commit 939e7c2bc93c13b6740051beeb08c5883b65ffce
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Nov 4 14:33:30 2005 +0000
-
- INTEGRATION: CWS i18n21 (1.3.46); FILE MERGED
- 2005/10/21 00:35:09 khong 1.3.46.1: #i55778 reverse back last change, treat letter and number combination as one word.
-
-commit 51594ef552a872b9868e5c7a025a68665488a016
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Nov 4 14:33:16 2005 +0000
-
- INTEGRATION: CWS i18n21 (1.2.2); FILE MERGED
- 2005/10/21 00:35:08 khong 1.2.2.1: #i55778 reverse back last change, treat letter and number combination as one word.
-
-commit f4fe39909c7ed645a8b387cf66de249572226ad6
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Nov 4 14:33:03 2005 +0000
-
- INTEGRATION: CWS i18n21 (1.3.46); FILE MERGED
- 2005/10/21 00:35:08 khong 1.3.46.1: #i55778 reverse back last change, treat letter and number combination as one word.
-
-commit 7f8af14611e66655ea7354083eafd71afc9703e3
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Nov 4 14:32:41 2005 +0000
-
- INTEGRATION: CWS i18n21 (1.4.46); FILE MERGED
- 2005/10/21 00:35:07 khong 1.4.46.1: #i55778 reverse back last change, treat letter and number combination as one word.
-
-commit 924e158b9d871fbf7500e9215540e26aa95b3b20
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Mon Oct 17 14:43:17 2005 +0000
-
- INTEGRATION: CWS i18n20 (1.1.2); FILE ADDED
- 2005/09/22 23:47:49 khong 1.1.2.1: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
-
-commit a428a8927006a10ccfe7182e6fe5a8b677281eca
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Mon Oct 17 14:42:30 2005 +0000
-
- INTEGRATION: CWS i18n20 (1.18.32); FILE MERGED
- 2005/09/23 15:59:13 khong 1.18.32.6: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
- 2005/09/23 08:09:54 khong 1.18.32.5: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
- 2005/09/23 07:38:03 khong 1.18.32.4: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule
- 2005/09/22 23:47:48 khong 1.18.32.3: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
- 2005/08/26 23:34:37 khong 1.18.32.2: #i50172# add cell breakiterator rule for Tamil
- 2005/08/26 23:31:59 khong 1.18.32.1: #i50172# add cell breakiterator rule for Tamil
-
-commit f518f78557931b81e06fd7b31bb22c6639e5e553
-Author: Rüdiger Timm <rt@openoffice.org>
-Date: Mon Oct 17 14:42:14 2005 +0000
-
- INTEGRATION: CWS i18n20 (1.6.32); FILE MERGED
- 2005/09/23 15:59:13 khong 1.6.32.3: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
- 2005/09/23 07:38:02 khong 1.6.32.2: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule
- 2005/09/22 23:47:48 khong 1.6.32.1: #i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
-
-commit 9b870055ecd043d1d4fadeacd351f8739e1979a0
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Fri Feb 25 09:08:13 2005 +0000
-
- INTEGRATION: CWS i18n16 (1.16.22); FILE MERGED
- 2005/02/04 19:05:45 khong 1.16.22.3: #i41671# use ICU rules for Thai breakiterator
- 2005/01/24 21:56:34 khong 1.16.22.2: #i35285# merge cws i18n16 with top version 1.17
- 2005/01/12 01:12:41 khong 1.16.22.1: #i35285# remove uprv_malloc, use udata_open for loading icu rule breakiterator
-
-commit 29b9e86f5dac388d7aaced24d3826ac9331b03e3
-Author: Vladimir Glazounov <vg@openoffice.org>
-Date: Fri Feb 25 09:07:59 2005 +0000
+done, regression tests added:
- INTEGRATION: CWS i18n16 (1.5.22); FILE MERGED
- 2005/02/04 19:05:45 khong 1.5.22.1: #i41671# use ICU rules for Thai breakiterator
+#112623# update Japanese word breakiterator dictionary
+#i50172# add cell breakiterator rule for Tamil
+#i80412# indic cursoring
+#i107843# em-dash/en-dash breakiterator fix for spell checking
+#i103552# Japanese word for 'shutdown' added to ja.dic
+#i113785# ligatures for spell checking will no longer break words
+An opening quote should not be counted as a word by word count tool (regression test in writer)
+fdo#31271 wrong line break with (
+#i89042# word count fix (regression test is in writer)
+#i58513# add break iterator rules for Finish
+#i19716# fix wrong line break on bracket characters
+#i21290# extend Greek script type
+#i21907# fix isBeginWord and isEndWord problem
+#i85411# Apply patch for ZWSP
+#i17155# fix line breakiterator rule to make slash and hyphen as part of word when doing line break
+#i13451# add '-' as midLetter for Catalan dictionary word breakiterator
+#i13494# fix word breakiterator rule to handle punctuations and signs correctly
+#i29548# Fix Thai word breakiterator problem
+#i11993# #i14904# fix word breakiterator issues
+#i64400# dash/hyphen should not break words (de/nds/nl/sv)
+#i22602# make dot stick on beginning of a word when doing line break
+#i24098# skip preceding space for beginOfSentence
+#i24098# fix beginOfSentence, which did not work correctly when cursor is on the beginning of the sentence
+#i51661# add quotation mark as middle letter for Hebrew in word breakiterator rule.
+#i50172# add cell breakiterator rule for Tamil
+#i55778# reverse back last change, treat letter and number combination as one word.
+#i56347# apply patch to recognize suffixes of numbers in Hungarian spellchecking
+#i56348# add Hungarian word break rule for edit mode
+#i65267# fix line break rule
+#i86439# many changes to implement, tweak, debug UTF-16 surrogate pair handling
+#i75631# "
+#i75632# "
+#i75633# "
+#i75412# "
+#i80645# fix backslash issues in line breakiterator
+#i80841# fix hyphen line break problem
+#i81448# fixed dot line break issue
+#i81448# fix the problem of line break on punctuations (commit message says i81440)
+#i81448# fix problem of line break on symbols
+#i83649# fixed the problem of line break between quotation mark and open bracket
+#i83464# fix the problem of line break between letter and 1326
+b6634800# fix line break problem of dot after letter and before number
+#i83229# fix the problem of leading hyphen for numbers
+#i80815# count words like MS Word
+
+likely superseded:
+
+#i21392# Obscure line break behavior mismatch in string of symbols between MSO and LO.
+#i80548# "fix dash issues in line breakiterator" - fix no longer works
+#i72868# "fix Chinese punctuation for line breakiterator" - fix no longer works
+#i80891# "fix Chinese punctuation for line breakiterator" - fix no longer works
+
+#i27711# Adding/tweaking/removing languages later added to ICU.
+#i33756# "
+#i41671# "
+#i41671# "
+#i55063# "
+#i24850# ICU upgrades, internal bug fixes, or other work-arounds.
+#i24098# "
+#112772# "
+#i35285# "
+4a1f1586173839d532f90507c72306bc9e2aec56 "
+a10b0e70c641d7438c557ef718c6942b3abffaec "
+05fadde6f025bcaafca4f3093e88be3cc1bb6836 "
+#i57866# "
+#i57866# "
+#i69482# "
+#142664# "
+#i60645# "
+#i53388# "
+#i60645# "
+#i78393# "
+#i73903# "
+#i75412# "
+#i72589# "
+#i75319# "
+#i76706# "
+#i64400# "
+#i64400# "
+#i79148# "
+#i55063# "
+#i87530# "
+#i88041# "
+#i88411# "
+#i80923# "
+#i80923# "
+#i81519# "
+
+
+suspect:
+
+
+- The intentions behind the following commits are unclear, as the referenced bugs were in the
+StarOffice internal bug tracker. These changes are contemporaneous with TR14 Revision 17, and seem
+to be part of an effort to backport upstream rule changes across multiple language customizations.
commit 746ea3d8c29b27b23af3433446f66db0ad3096d6
Author: Oliver Bolte <obo@openoffice.org>
INTEGRATION: CWS i18n15 (1.3.36); FILE MERGED
2004/09/04 02:03:53 khong 1.3.36.1: #117685# make dictionary word contain only letter or only number, dot can be in middle or end of a word, but only one.
-commit e5a62ce85bebcc9fb2bf0e5b9aced5fc7748055b
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Tue Jan 11 10:18:37 2005 +0000
-
- INTEGRATION: CWS i18n15 (1.16.4); FILE MERGED
- 2004/10/07 18:19:11 khong 1.16.4.1: #i33756# update Hungarian breakiterator
-
-commit d2a6a31e6981800c2a920f8c6ff901c341a0466e
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Jul 30 13:38:57 2004 +0000
-
- INTEGRATION: CWS i18n13 (1.8.92); FILE MERGED
- 2004/06/14 23:24:16 khong 1.8.92.2: #112772# Japanese word breakiterator is not correct
- 2004/06/11 19:23:04 khong 1.8.92.1: #112772# Japanese word breakiterator is not correct
-commit d6b8dabc3dc4811e1152d411a8428ccb334d16ab
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Jul 30 13:38:17 2004 +0000
-
- INTEGRATION: CWS i18n13 (1.7.162); FILE MERGED
- 2004/06/11 19:23:04 khong 1.7.162.1: #112772# Japanese word breakiterator is not correct
-
-commit 9ea4c16a699ac7cf5e255a19653651ac993f022b
-Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Jul 30 13:38:05 2004 +0000
-
- INTEGRATION: CWS i18n13 (1.9.92); FILE MERGED
- 2004/06/11 19:23:04 khong 1.9.92.1: #112772# Japanese word breakiterator is not correct
+- The intention behind the following commit is unclear, as the bug references are incorrect and no
+good candidates were immediately apparent. Based on the text of the commit, however, it appears to
+be a simple bug fix for skipSpace(). This function has also had a great deal of churn since this
+commit, further suggesting it is no longer pertinent.
-commit 2887ecb5554eee699e1dce4ffbc2dfcf71a54a41
+commit 1967d8fb182b3101dee4f715e78be384400bc1e8
Author: Kurt Zenker <kz@openoffice.org>
-Date: Fri Jul 30 13:37:54 2004 +0000
-
- INTEGRATION: CWS i18n13 (1.15.18); FILE MERGED
- 2004/06/17 20:29:38 khong 1.15.18.2: #
- 2004/06/02 04:54:24 khong 1.15.18.1: #i11993# fix getWordBoundary problem when position is on the end of the word.
-
-commit 606556eed208d1218f950df2200510a7e19af1d9
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Fri May 28 15:33:28 2004 +0000
-
- INTEGRATION: CWS i18n12 (1.1.2); FILE ADDED
- 2004/04/30 14:37:52 er 1.1.2.1: #i27711# Hungarian breakiterator (provided by Timar Andras)
-
-commit 9710ca90166c18c0a92f7f0246a7c2f7dae87ebc
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Fri May 28 15:33:17 2004 +0000
-
- INTEGRATION: CWS i18n12 (1.4.22); FILE MERGED
- 2004/04/13 11:55:32 er 1.4.22.1: #i27711# Hungarian breakiterator
-
-commit b138663ef4f4ade38fb42f8a2f567527cf15949b
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Fri May 28 15:33:02 2004 +0000
-
- INTEGRATION: CWS i18n12 (1.13.22); FILE MERGED
- 2004/04/30 11:25:47 er 1.13.22.2: RESYNC: (1.13-1.14); FILE MERGED
- 2004/04/13 11:55:32 er 1.13.22.1: #i27711# Hungarian breakiterator
-
-commit f5bc5f04e4de8fa502d498a99f4ef6a340d796c0
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Wed Mar 17 08:02:14 2004 +0000
-
- INTEGRATION: CWS i18n11 (1.13.14); FILE MERGED
- 2004/02/04 02:09:04 khong 1.13.14.2: #i24098# skip preceding space for beginOfSentence
- 2004/01/06 19:41:49 khong 1.13.14.1: #i24098# fix beginOfSentence, which did not work correctly when cursor is on the beginning of the sentence
-
-commit 16401a5b865b5da8a2dd70057e8b048e9b797d5a
-Author: Oliver Bolte <obo@openoffice.org>
-Date: Wed Mar 17 08:02:01 2004 +0000
-
- INTEGRATION: CWS i18n11 (1.12.14); FILE MERGED
- 2004/02/10 14:21:13 er 1.12.14.3: RESYNC: (1.12-1.13); FILE MERGED
- 2004/02/05 16:45:30 khong 1.12.14.2: #i24850# fix the problem in previousCharBlock, when target char block is in position 1
- 2004/02/04 02:13:48 khong 1.12.14.1: #i24098# check boundary condition for Sentence, Script, CharBlock breakiterator
-
-commit 4da98b648497af30de0fcf1a16e649ce18b0564f
-Author: Jens-Heiner Rechtien <hr@openoffice.org>
-Date: Mon Mar 8 16:17:05 2004 +0000
-
- INTEGRATION: CWS i18n09 (1.2.2); FILE MERGED
- 2003/12/04 23:45:37 khong 1.2.2.3: #i22602# make dot stick on beginning of a word when doing line break
- 2003/12/04 23:12:37 khong 1.2.2.2: #i21392# change line break rule to match with MS office
+Date: Wed Sep 5 16:37:28 2007 +0000
-done, regression tests added:
+ INTEGRATION: CWS i18n37 (1.22.6); FILE MERGED
+ 2007/09/03 18:27:39 khong 1.22.6.2: i8132 fixed a problem in skipping space for word breakiterator
+ 2007/08/31 21:30:30 khong 1.22.6.1: i81158 fix skipping space problem
-#112623# update Japanese word breakiterator dictionary
-#i50172# add cell breakiterator rule for Tamil
-#i80412# indic cursoring
-#i107843# em-dash/en-dash breakiterator fix for spell checking
-#i103552# Japanese word for 'shutdown' added to ja.dic
-#i113785# ligatures for spell checking will no longer break words
-An opening quote should not be counted as a word by word count tool (regression test in writer)
-fdo#31271 wrong line break with (
-#i89042# word count fix (regression test is in writer)
-#i58513# add break iterator rules for Finish
-#i19716# fix wrong line break on bracket characters
-#i21290# extend Greek script type
-#i21907# fix isBeginWord and isEndWord problem
-#i85411# Apply patch for ZWSP
-#i17155# fix line breakiterator rule to make slash and hyphen as part of word when doing line break
-#i13451# add '-' as midLetter for Catalan dictionary word breakiterator
-#i13494# fix word breakiterator rule to handle punctuations and signs correctly
-#i29548# Fix Thai word breakiterator problem
-#i11993# #i14904# fix word breakiterator issues