From: Graham Inggs Date: Sat, 7 Jul 2018 08:27:37 +0000 (+0200) Subject: New upstream version 2.1.1 X-Git-Tag: archive/raspbian/2.11.3-2+rpi1^2~10^2~11 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=07148425c1245ffb63b675c52df2e63f9ad49328;p=utf8proc.git New upstream version 2.1.1 --- diff --git a/.gitignore b/.gitignore index 41a6cff..81f237e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ test/normtest test/graphemetest test/printproperty test/charwidth +test/misc test/valid test/iterate test/case diff --git a/CMakeLists.txt b/CMakeLists.txt index be676ba..0ea3bd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ project (utf8proc C) # Be sure to also update these in Makefile and MANIFEST! set(SO_MAJOR 2) set(SO_MINOR 1) -set(SO_PATCH 0) +set(SO_PATCH 1) add_definitions ( -DUTF8PROC_EXPORTS diff --git a/MANIFEST b/MANIFEST index b39f8a8..db9620c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2,6 +2,6 @@ include/ include/utf8proc.h lib/ lib/libutf8proc.a -lib/libutf8proc.so -> libutf8proc.so.2.1.0 -lib/libutf8proc.so.2 -> libutf8proc.so.2.1.0 -lib/libutf8proc.so.2.1.0 +lib/libutf8proc.so -> libutf8proc.so.2.1.1 +lib/libutf8proc.so.2 -> libutf8proc.so.2.1.1 +lib/libutf8proc.so.2.1.1 diff --git a/Makefile b/Makefile index 51995c3..f8e5e8b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ # libutf8proc Makefile # programs -MAKE=make AR?=ar CC?=gcc INSTALL=install @@ -22,7 +21,7 @@ UCFLAGS = $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS # Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt! MAJOR=2 MINOR=1 -PATCH=0 +PATCH=1 OS := $(shell uname) ifeq ($(OS),Darwin) # MacOS X @@ -49,7 +48,7 @@ clean: ifneq ($(OS),Darwin) rm -f libutf8proc.so.$(MAJOR) endif - rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom + rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom test/misc rm -rf MANIFEST.new tmp $(MAKE) -C bench clean $(MAKE) -C data clean @@ -82,7 +81,7 @@ libutf8proc.so: libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@.$(MAJOR) libutf8proc.$(MAJOR).dylib: utf8proc.o - $(CC) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH) + $(CC) $(LDFLAGS) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH) libutf8proc.dylib: libutf8proc.$(MAJOR).dylib ln -f -s libutf8proc.$(MAJOR).dylib $@ @@ -139,11 +138,15 @@ test/case: test/case.c test/tests.o utf8proc.o utf8proc.h test/tests.h test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h $(CC) $(UCFLAGS) test/custom.c test/tests.o utf8proc.o -o $@ -check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/custom test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o +test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h + $(CC) $(UCFLAGS) test/misc.c test/tests.o utf8proc.o -o $@ + +check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o $(MAKE) -C bench test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/charwidth + test/misc test/valid test/iterate test/case diff --git a/NEWS.md b/NEWS.md index 22eff0e..6e6d1ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,16 @@ # utf8proc release history # +## Version 2.1.1 ## + +2018-04-27 + +- Fixed composition bug ([#128]). + +- Minor build fixes ([#94], [#99], [#113], [#125]). + ## Version 2.1 ## -2016-12-16: +2016-12-26: - New functions `utf8proc_map_custom` and `utf8proc_decompose_custom` to allow user-supplied transformations of codepoints, in conjunction @@ -297,7 +305,12 @@ Release of version 1.0.1 [#78]: https://github.com/JuliaLang/utf8proc/issues/78 [#79]: https://github.com/JuliaLang/utf8proc/issues/79 [#80]: https://github.com/JuliaLang/utf8proc/issues/80 -[#84]: https://github.com/JuliaLang/utf8proc/pull/84 -[#88]: https://github.com/JuliaLang/utf8proc/pull/88 -[#89]: https://github.com/JuliaLang/utf8proc/pull/89 +[#84]: https://github.com/JuliaLang/utf8proc/issues/84 +[#88]: https://github.com/JuliaLang/utf8proc/issues/88 +[#89]: https://github.com/JuliaLang/utf8proc/issues/89 [#90]: https://github.com/JuliaLang/utf8proc/issues/90 +[#94]: https://github.com/JuliaLang/utf8proc/issues/94 +[#99]: https://github.com/JuliaLang/utf8proc/issues/99 +[#113]: https://github.com/JuliaLang/utf8proc/issues/113 +[#125]: https://github.com/JuliaLang/utf8proc/issues/125 +[#128]: https://github.com/JuliaLang/utf8proc/issues/128 diff --git a/data/Makefile b/data/Makefile index 19d375f..f1a3f5f 100644 --- a/data/Makefile +++ b/data/Makefile @@ -38,7 +38,7 @@ CharWidths.txt: charwidths.jl unifont.sfd unifont_upper.sfd EastAsianWidth.txt UNICODE_VERSION=9.0.0 UnicodeData.txt: - $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt + $(CURL) $(CURLFLAGS) -o $@ -O http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt EastAsianWidth.txt: $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt diff --git a/data/data_generator.rb b/data/data_generator.rb index 97c9033..37a3780 100644 --- a/data/data_generator.rb +++ b/data/data_generator.rb @@ -340,7 +340,7 @@ for code in 0...0x110000 end end -$stdout << "const utf8proc_uint16_t utf8proc_sequences[] = {\n " +$stdout << "static const utf8proc_uint16_t utf8proc_sequences[] = {\n " i = 0 $int_array.each do |entry| i += 1 @@ -352,7 +352,7 @@ $int_array.each do |entry| end $stdout << "};\n\n" -$stdout << "const utf8proc_uint16_t utf8proc_stage1table[] = {\n " +$stdout << "static const utf8proc_uint16_t utf8proc_stage1table[] = {\n " i = 0 stage1.each do |entry| i += 1 @@ -364,7 +364,7 @@ stage1.each do |entry| end $stdout << "};\n\n" -$stdout << "const utf8proc_uint16_t utf8proc_stage2table[] = {\n " +$stdout << "static const utf8proc_uint16_t utf8proc_stage2table[] = {\n " i = 0 stage2.flatten.each do |entry| i += 1 @@ -376,7 +376,7 @@ stage2.flatten.each do |entry| end $stdout << "};\n\n" -$stdout << "const utf8proc_property_t utf8proc_properties[] = {\n" +$stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n" $stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 0, 0, UTF8PROC_BOUNDCLASS_OTHER},\n" properties.each { |line| $stdout << line @@ -385,7 +385,7 @@ $stdout << "};\n\n" -$stdout << "const utf8proc_uint16_t utf8proc_combinations[] = {\n " +$stdout << "static const utf8proc_uint16_t utf8proc_combinations[] = {\n " i = 0 comb1st_indicies.keys.each_index do |a| offset = 0 diff --git a/test/misc.c b/test/misc.c new file mode 100644 index 0000000..e26cc6f --- /dev/null +++ b/test/misc.c @@ -0,0 +1,27 @@ +/* Miscellaneous tests, e.g. regression tests */ + +#include "tests.h" + +static void issue128(void) /* #128 */ +{ + utf8proc_uint8_t input[] = {0x72, 0xcc, 0x87, 0xcc, 0xa3, 0x00}; /* "r\u0307\u0323" */ + utf8proc_uint8_t nfc[] = {0xe1, 0xb9, 0x9b, 0xcc, 0x87, 0x00}; /* "\u1E5B\u0307" */ + utf8proc_uint8_t nfd[] = {0x72, 0xcc, 0xa3, 0xcc, 0x87, 0x00}; /* "r\u0323\u0307" */ + utf8proc_uint8_t *nfc_out, *nfd_out; + nfc_out = utf8proc_NFC(input); + printf("NFC \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)nfc_out, (char*)nfc); + check(strlen((char*) nfc_out) == 5, "incorrect nfc length"); + check(!memcmp(nfc, nfc_out, 6), "incorrect nfc data"); + nfd_out = utf8proc_NFD(input); + printf("NFD \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)nfd_out, (char*)nfd); + check(strlen((char*) nfd_out) == 5, "incorrect nfd length"); + check(!memcmp(nfd, nfd_out, 6), "incorrect nfd data"); + free(nfd_out); free(nfc_out); +} + +int main(void) +{ + issue128(); + printf("Misc tests SUCCEEDED.\n"); + return 0; +} diff --git a/utf8proc.c b/utf8proc.c index c14bbe1..c0f84d9 100644 --- a/utf8proc.c +++ b/utf8proc.c @@ -284,9 +284,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) { static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state) { - int lbc_override = lbc; - if (state && *state != UTF8PROC_BOUNDCLASS_START) - lbc_override = *state; + int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START) + ? *state : lbc); utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc); if (state) { // Special support for GB 12/13 made possible by GB999. After two RI @@ -632,9 +631,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b current_property->comb_index != UINT16_MAX && current_property->comb_index >= 0x8000) { int sidx = starter_property->comb_index; - int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx]; - if (idx >= 0 && idx <= utf8proc_combinations[sidx + 1] ) { - idx += sidx + 2; + int idx = current_property->comb_index & 0x3FFF; + if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) { + idx += sidx + 2 - utf8proc_combinations[sidx]; if (current_property->comb_index & 0x4000) { composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1]; } else diff --git a/utf8proc.h b/utf8proc.h index edf46d4..7b3e6fd 100644 --- a/utf8proc.h +++ b/utf8proc.h @@ -28,7 +28,7 @@ * utf8proc is a free/open-source (MIT/expat licensed) C library * providing Unicode normalization, case-folding, and other operations * for strings in the UTF-8 encoding, supporting Unicode version - * 8.0.0. See the utf8proc home page (http://julialang.org/utf8proc/) + * 9.0.0. See the utf8proc home page (http://julialang.org/utf8proc/) * for downloads and other information, or the source code on github * (https://github.com/JuliaLang/utf8proc). * @@ -73,11 +73,11 @@ /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ #define UTF8PROC_VERSION_MINOR 1 /** The PATCH version (increased for fixes that do not change the API). */ -#define UTF8PROC_VERSION_PATCH 0 +#define UTF8PROC_VERSION_PATCH 1 /** @} */ #include -#include + #if defined(_MSC_VER) && _MSC_VER < 1800 // MSVC prior to 2013 lacked stdbool.h and inttypes.h typedef signed char utf8proc_int8_t; diff --git a/utf8proc_data.c b/utf8proc_data.c index d8a56bb..3de2873 100644 --- a/utf8proc_data.c +++ b/utf8proc_data.c @@ -1,4 +1,4 @@ -const utf8proc_uint16_t utf8proc_sequences[] = { +static const utf8proc_uint16_t utf8proc_sequences[] = { 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, @@ -1176,7 +1176,7 @@ const utf8proc_uint16_t utf8proc_sequences[] = { 56603, 55354, 56604, 55354, 56605, 55354, 56606, 55354, 56607, 55354, 56608, 55354, 56609, }; -const utf8proc_uint16_t utf8proc_stage1table[] = { +static const utf8proc_uint16_t utf8proc_stage1table[] = { 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, @@ -1723,7 +1723,7 @@ const utf8proc_uint16_t utf8proc_stage1table[] = { 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, 38656, }; -const utf8proc_uint16_t utf8proc_stage2table[] = { +static const utf8proc_uint16_t utf8proc_stage2table[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 3, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -6590,7 +6590,7 @@ const utf8proc_uint16_t utf8proc_stage2table[] = { 3984, 3984, 3984, 3984, 3984, 3984, 3984, 0, 0, }; -const utf8proc_property_t utf8proc_properties[] = { +static const utf8proc_property_t utf8proc_properties[] = { {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 0, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_CC, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, true, false, true, 0, 0, UTF8PROC_BOUNDCLASS_CONTROL}, {UTF8PROC_CATEGORY_CC, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, true, 0, 0, UTF8PROC_BOUNDCLASS_CONTROL}, @@ -13420,7 +13420,7 @@ const utf8proc_property_t utf8proc_properties[] = { {UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, 0, 7975, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, }; -const utf8proc_uint16_t utf8proc_combinations[] = { +static const utf8proc_uint16_t utf8proc_combinations[] = { 0, 46, 192, 193, 194, 195, 196, 197, 0, 256, 258, 260, 550, 461, 0, 0, 512, 514, 0, 0, 0, 0, 0, 0, 0,