} else { //word boundary break
lbr.breakIndex = pLineBI->preceding(nStartPos);
lbr.breakType = BreakType::WORDBOUNDARY;
+
+ // Special case for Slash U+002F SOLIDUS in URI and path names.
+ // TR14 defines that as SY: Symbols Allowing Break After (A).
+ // This is unwanted in paths, see also i#17155
+ if (lbr.breakIndex > 0 && Text[lbr.breakIndex-1] == '/')
+ {
+ // Look backward and take any whitespace before as a break
+ // opportunity. This also glues something like "w/o".
+ // Avoid an overly long path and break it as was indicated.
+ // Overly long here is arbitrarily defined.
+ const sal_Int32 nOverlyLong = 66;
+ sal_Int32 nPos = lbr.breakIndex - 1;
+ while (nPos > 0 && lbr.breakIndex - nPos < nOverlyLong)
+ {
+ if (u_isWhitespace(Text.iterateCodePoints( &nPos, -1)))
+ {
+ lbr.breakIndex = nPos + 1;
+ break;
+ }
+ }
+ }
}
#define WJ 0x2060 // Word Joiner
case USCRIPT_SYMBOLS_EMOJI:
sRet = "mis"; // Zsye - Emoji variant
break;
+#endif
+#if (U_ICU_VERSION_MAJOR_NUM >= 60)
+ case USCRIPT_MASARAM_GONDI:
+ sRet = "gon-Gonm"; // macro language code, could be wsg,esg,gno
+ break;
+ case USCRIPT_SOYOMBO:
+ sRet = "mn-Soyo"; // abugida to write Mongolian, also Tibetan and Sanskrit
+ break;
+ case USCRIPT_ZANABAZAR_SQUARE:
+ sRet = "mn-Zanb"; // abugida to write Mongolian
+ break;
#endif
}
return sRet;
#define RID_SUBSETSTR_OSAGE (RID_SUBSET_START + 274)
#define RID_SUBSETSTR_TANGUT (RID_SUBSET_START + 275)
#define RID_SUBSETSTR_TANGUT_COMPONENTS (RID_SUBSET_START + 276)
+#define RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F (RID_SUBSET_START + 277)
+#define RID_SUBSETSTR_KANA_EXTENDED_A (RID_SUBSET_START + 278)
+#define RID_SUBSETSTR_MASARAM_GONDI (RID_SUBSET_START + 279)
+#define RID_SUBSETSTR_NUSHU (RID_SUBSET_START + 280)
+#define RID_SUBSETSTR_SOYOMBO (RID_SUBSET_START + 281)
+#define RID_SUBSETSTR_SYRIAC_SUPPLEMENT (RID_SUBSET_START + 282)
+#define RID_SUBSETSTR_ZANABAZAR_SQUARE (RID_SUBSET_START + 283)
// RID_SUBSET_END (RID_SUBSET_START + 299)
aAllSubsets.push_back( Subset( 0x18800, 0x18AFF, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_TANGUT_COMPONENTS)) ) );
break;
#endif
+#if (U_ICU_VERSION_MAJOR_NUM >= 60)
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F:
+ aAllSubsets.push_back( Subset( 0x2CEB0, 0x2EBE0, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F)) ) );
+ break;
+ case UBLOCK_KANA_EXTENDED_A:
+ aAllSubsets.push_back( Subset( 0x1B100, 0x1B12F, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_KANA_EXTENDED_A)) ) );
+ break;
+ case UBLOCK_MASARAM_GONDI:
+ aAllSubsets.push_back( Subset( 0x11D00, 0x11D5F, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_MASARAM_GONDI)) ) );
+ break;
+ case UBLOCK_NUSHU:
+ aAllSubsets.push_back( Subset( 0x1B170, 0x1B2FF, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_NUSHU)) ) );
+ break;
+ case UBLOCK_SOYOMBO:
+ aAllSubsets.push_back( Subset( 0x11A50, 0x11AAF, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_SOYOMBO)) ) );
+ break;
+ case UBLOCK_SYRIAC_SUPPLEMENT:
+ aAllSubsets.push_back( Subset( 0x0860, 0x086f, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_SYRIAC_SUPPLEMENT)) ) );
+ break;
+ case UBLOCK_ZANABAZAR_SQUARE:
+ aAllSubsets.push_back( Subset( 0x11A00, 0x11A4F, aStringList.GetString(aStringList.FindIndex(RID_SUBSETSTR_ZANABAZAR_SQUARE)) ) );
+ break;
+#endif
}
< "Osage"; RID_SUBSETSTR_OSAGE ; > ;
< "Tangut"; RID_SUBSETSTR_TANGUT ; > ;
< "Tangut Components"; RID_SUBSETSTR_TANGUT_COMPONENTS ; > ;
+ < "CJK Unified Ideographs Extension F"; RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F ; > ;
+ < "Kana Extended-A"; RID_SUBSETSTR_KANA_EXTENDED_A ; > ;
+ < "Masaram Gondi"; RID_SUBSETSTR_MASARAM_GONDI ; > ;
+ < "Nushu"; RID_SUBSETSTR_NUSHU ; > ;
+ < "Soyombo"; RID_SUBSETSTR_SOYOMBO ; > ;
+ < "Syriac Supplement"; RID_SUBSETSTR_SYRIAC_SUPPLEMENT ; > ;
+ < "Zanabazar Square"; RID_SUBSETSTR_ZANABAZAR_SQUARE ; > ;
};
};