From 7a648b4099bd8ca66ad9d43c6332e5ac3c1e5748 Mon Sep 17 00:00:00 2001 From: Michael Gilbert Date: Tue, 13 Sep 2022 01:46:21 +0100 Subject: [PATCH] use system unicode Gbp-Pq: Topic generate Gbp-Pq: Name unicode.patch --- tools/make_unicode | 335 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 310 insertions(+), 25 deletions(-) diff --git a/tools/make_unicode b/tools/make_unicode index 2bfe6f9..24eb3b6 100755 --- a/tools/make_unicode +++ b/tools/make_unicode @@ -21,18 +21,19 @@ use strict; -# base URLs for www.unicode.org files -my $UNIVERSION = "14.0.0"; -my $UNIDATA = "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip"; -my $IDNADATA = "https://www.unicode.org/Public/idna/$UNIVERSION"; -my $JISDATA = "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS"; -my $KSCDATA = "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC"; -my $REPORTS = "http://www.unicode.org/reports"; -my $MSDATA = "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498"; +# Locations of unicode files +my $UNIVERSION = `dpkg-parsechangelog --file /usr/share/doc/unicode-data/changelog.Debian.gz --show-field Version | sed s/-.*//`; +chomp($UNIVERSION); +my $UNIDATA = "/usr/share/unicode"; +my $IDNADATA = "debian/unicode/tr46/$UNIVERSION"; +my $JISDATA = "debian/unicode/mappings/OBSOLETE/EASTASIA/JIS"; +my $KSCDATA = "debian/unicode/mappings/OBSOLETE/EASTASIA/KSC"; +my $REPORTS = "debian/unicode"; +my $MSDATA = "debian/unicode"; my $MSCODEPAGES = "$MSDATA/Windows Supported Code Page Data Files.zip"; # Sort keys file -my $SORTKEYS = "tr10/allkeys.txt"; +my $SORTKEYS = "tr10/3.1.0/allkeys.txt"; # Default char for undefined mappings my $DEF_CHAR = ord '?'; @@ -440,6 +441,7 @@ my @category_table = (); my @initial_joining_table = (); my @direction_table = (); my @decomp_table = (); +my @compose_table = (); my @combining_class_table = (); my @decomp_compat_table = (); my @comp_exclusions = (); @@ -487,25 +489,11 @@ sub open_data_file($$) if ($base =~ /.*\/([^\/]+)\.zip$/) { - my $zip = "$1$suffix.zip"; - unless (-f "$cache/$zip") - { - system "mkdir", "-p", $cache; - print "Fetching $base...\n"; - !system "wget", "-q", "-O", "$cache/$zip", $base or die "cannot fetch $base"; - } - open FILE, "-|", "unzip", "-p", "$cache/$zip", $name or die "cannot extract $name from $zip"; + open FILE, "-|", "unzip", "-p", "$base", $name or die "cannot extract $name from $base"; } else { - (my $dest = "$cache/$name") =~ s/(.*)(\.[^\/.]+)$/$1$suffix$2/; - unless (-f $dest) - { - system "mkdir", "-p", $dir; - print "Fetching $base/$name...\n"; - !system "wget", "-q", "-O", $dest, "$base/$name" or die "cannot fetch $base/$name"; - } - open FILE, "<$dest" or die "cannot open $dest"; + open FILE, "<$base/$name" or die "cannot open $base/$name"; } return *FILE; } @@ -724,6 +712,7 @@ sub load_data() if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/) { $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ]; + push @compose_table, [ hex $1, hex $2, $src ]; } elsif ($decomp =~ /^([0-9a-fA-F]+)$/) { @@ -1986,6 +1975,297 @@ sub dump_bidi_dir_table($) save_file($filename); } +################################################################ +# dump the char composition table +sub dump_compose_table($) +{ + my $filename = shift; + + open OUTPUT,">$filename.new" or die "Cannot create $filename"; + print "Building $filename\n"; + print OUTPUT "/* Unicode char composition */\n"; + print OUTPUT "/* generated from $UNIDATA:UnicodeData.txt */\n"; + print OUTPUT "/* DO NOT EDIT!! */\n\n"; + print OUTPUT "#include \"wine/asm.h\"\n\n"; + print OUTPUT "#ifdef __ASM_OBSOLETE\n\n"; + print OUTPUT "#include \"windef.h\"\n\n"; + + my @filled = (); + foreach my $i (@compose_table) + { + my @comp = @$i; + push @{$filled[$comp[1]]}, [ $comp[0], $comp[2] ]; + } + + # count how many different second chars we have + + my $count = 0; + for (my $i = 0; $i < 65536; $i++) + { + next unless defined $filled[$i]; + $count++; + } + + # build the table of second chars and offsets + + my $pos = $count + 1; + my @table = (); + for (my $i = 0; $i < 65536; $i++) + { + next unless defined $filled[$i]; + push @table, $i, $pos; + $pos += @{$filled[$i]}; + } + # terminator with last position + push @table, 0, $pos; + printf OUTPUT "static const WCHAR table[0x%x] =\n{\n", 2*$pos; + printf OUTPUT " /* second chars + offsets */\n%s", dump_array( 16, 0, @table ); + + # build the table of first chars and mappings + + for (my $i = 0; $i < 65536; $i++) + { + next unless defined $filled[$i]; + my @table = (); + my @list = sort { $a->[0] <=> $b->[0] } @{$filled[$i]}; + for (my $j = 0; $j <= $#list; $j++) + { + push @table, $list[$j][0], $list[$j][1]; + } + printf OUTPUT ",\n /* 0x%04x */\n%s", $i, dump_array( 16, 0, @table ); + } + print OUTPUT "\n};\n\n"; + print OUTPUT <<"EOF"; +static inline int binary_search( WCHAR ch, int low, int high ) +{ + while (low <= high) + { + int pos = (low + high) / 2; + if (table[2 * pos] < ch) low = pos + 1; + else if (table[2 * pos] > ch) high = pos - 1; + else return pos; + } + return -1; +} + +WCHAR DECLSPEC_HIDDEN wine_compose( const WCHAR *str ) +{ + int pos, idx = 1, start = 0, count = $count; + for (;;) + { + if ((pos = binary_search( str[idx], start, count - 1 )) == -1) return 0; + if (!idx--) return table[2 * pos + 1]; + start = table[2 * pos + 1]; + count = table[2 * pos + 3]; + } +} + +#endif /* __ASM_OBSOLETE */ +EOF + close OUTPUT; + save_file($filename); +} + +################################################################ +# dump a decomposition table +sub dump_decompositions($@) +{ + my ($name, @decomp) = @_; + + # first determine all the 16-char subsets that contain something + + my @filled = (0) x 4096; + my $pos = 16; # for the null subset + my $data_total = 0; + for (my $i = 0; $i < 65536; $i++) + { + next unless defined $decomp[$i]; + if ($filled[$i >> 4] == 0) + { + $filled[$i >> 4] = $pos; + $pos += 16; + } + $data_total += @{$decomp[$i]}; + } + my $total = $pos; + + # now count the 256-char subsets that contain something + + my @filled_idx = (256) x 256; + $pos = 256 + 16; + for (my $i = 0; $i < 4096; $i++) + { + next unless $filled[$i]; + $filled_idx[$i >> 4] = $pos; + $pos += 16; + $i |= 15; + } + my $null_offset = $pos; # null mapping + $total += $pos + 1; # add the offset sentinel + + # add the index offsets to the subsets positions + + for (my $i = 0; $i < 4096; $i++) + { + next unless $filled[$i]; + $filled[$i] += $null_offset; + } + + # dump the main index + + printf OUTPUT "\nconst WCHAR DECLSPEC_HIDDEN %s[%d] =\n", $name, $total + $data_total; + printf OUTPUT "{\n /* index */\n"; + printf OUTPUT "%s", dump_array( 16, 0, @filled_idx ); + printf OUTPUT ",\n /* null sub-index */\n%s", dump_array( 16, 0, ($null_offset) x 16 ); + + # dump the second-level indexes + + for (my $i = 0; $i < 256; $i++) + { + next unless ($filled_idx[$i] > 256); + my @table = @filled[($i<<4)..($i<<4)+15]; + for (my $j = 0; $j < 16; $j++) { $table[$j] ||= $null_offset; } + printf OUTPUT ",\n /* sub-index %02x */\n", $i; + printf OUTPUT "%s", dump_array( 16, 0, @table ); + } + + # dump the 16-char offsets + + printf OUTPUT ",\n /* null offsets */\n"; + printf OUTPUT "%s", dump_array( 16, 0, ($total) x (16) ); + + $pos = $total; + + my @data; + for (my $i = 0; $i < 4096; $i++) + { + next unless $filled[$i]; + my @table = (0) x (16); + for (my $j = 0; $j < 16; $j++) + { + $table[$j] = $pos; + if (defined $decomp[($i<<4) + $j]) + { + $pos += $#{$decomp[($i<<4) + $j]} + 1; + push @data, @{$decomp[($i<<4) + $j]}; + } + } + printf OUTPUT ",\n /* offsets 0x%03x0 .. 0x%03xf */\n", $i, $i; + printf OUTPUT "%s", dump_array( 16, 0, @table ); + } + + my @sentinel = $pos; + printf OUTPUT ",\n /* offset sentinel */\n"; + printf OUTPUT "%s", dump_array( 16, 0, @sentinel ); + + printf OUTPUT ",\n /* data */\n"; + printf OUTPUT "%s", dump_array( 16, 0, @data ); + + printf OUTPUT "\n};\n"; +} + +################################################################ +# dump the char decomposition table +sub dump_decompose_table($$) +{ + my ($filename, $compat) = @_; + + open OUTPUT,">$filename.new" or die "Cannot create $filename"; + print "Building $filename\n"; + print OUTPUT "/* Unicode char composition */\n"; + print OUTPUT "/* generated from $UNIDATA:UnicodeData.txt */\n"; + print OUTPUT "/* DO NOT EDIT!! */\n\n"; + print OUTPUT "#include \"wine/asm.h\"\n\n"; + print OUTPUT "#ifdef __ASM_OBSOLETE\n\n"; + print OUTPUT "#include \"windef.h\"\n"; + + dump_decompositions( "nfd_table", build_decompositions( @decomp_table )); + dump_decompositions( "nfkd_table", build_decompositions( @decomp_compat_table )) if $compat; + + print OUTPUT "\n#endif /* __ASM_OBSOLETE */\n"; + close OUTPUT; + save_file($filename); +} + +################################################################ +# dump a case mapping table +sub dump_case_table($@) +{ + my ($name,@table) = @_; + + for (my $i = 0; $i < 65536; $i++) + { + next unless defined $table[$i]; + $table[$i] = ($table[$i] - $i) & 0xffff; + } + + my @array = compress_array( 256, 0, @table[0..65535] ); + + printf OUTPUT "const WCHAR %s[%d] =\n", $name, scalar @array; + printf OUTPUT "{\n /* index */\n"; + printf OUTPUT "%s,\n", dump_array( 16, 0, @array[0..255] ); + printf OUTPUT " /* data */\n"; + printf OUTPUT "%s", dump_array( 16, 0, @array[256..$#array] ); + printf OUTPUT "\n};\n"; +} + +################################################################ +# dump the case mapping tables +sub dump_case_mappings($) +{ + my $filename = shift; + open OUTPUT,">$filename.new" or die "Cannot create $filename"; + print "Building $filename\n"; + print OUTPUT "/* Unicode case mappings */\n"; + print OUTPUT "/* generated from $UNIDATA:UnicodeData.txt */\n"; + print OUTPUT "/* DO NOT EDIT!! */\n\n"; + print OUTPUT "#include \"wine/asm.h\"\n\n"; + print OUTPUT "#ifdef __ASM_OBSOLETE\n\n"; + print OUTPUT "#include \"windef.h\"\n\n"; + + my @upper = @toupper_table; + my @lower = @tolower_table; + remove_linguistic_mappings( \@upper, \@lower ); + + dump_case_table( "wine_casemap_lower", @lower ); + print OUTPUT "\n"; + dump_case_table( "wine_casemap_upper", @upper ); + print OUTPUT "\n#endif /* __ASM_OBSOLETE */\n"; + close OUTPUT; + save_file($filename); +} + +################################################################ +# dump the ctype tables +sub dump_ctype_tables($) +{ + my $filename = shift; + open OUTPUT,">$filename.new" or die "Cannot create $filename"; + printf "Building $filename\n"; + printf OUTPUT "/* Unicode ctype tables */\n"; + printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n"; + printf OUTPUT "#include \"wine/asm.h\"\n\n"; + printf OUTPUT "#ifdef __ASM_OBSOLETE\n\n"; + printf OUTPUT "#include \"windef.h\"\n\n"; + + my @table = map { ($_ || 0) & 0xffff; } @category_table; + + # add the direction in the high 4 bits of the category + for (my $i = 0; $i < 65536; $i++) + { + $table[$i] |= $directions{$direction_table[$i]} << 12 if defined $direction_table[$i]; + } + + my @array = compress_array( 256, 0, @table[0..65535] ); + + printf OUTPUT "const unsigned short %s[%d] =\n{\n", "wine_wctype_table", $#array+1; + printf OUTPUT " /* offsets */\n%s,\n", dump_array( 16, 0, @array[0..255] ); + printf OUTPUT " /* values */\n%s\n};\n", dump_array( 16, 0, @array[256..$#array] ); + printf OUTPUT "\n#endif /* __ASM_OBSOLETE */\n"; + + close OUTPUT; + save_file($filename); +} sub rol($$) { @@ -2819,7 +3099,12 @@ sub save_file($) chdir ".." if -f "./make_unicode"; load_data(); +dump_case_mappings( "libs/wine/casemap.c" ); +dump_sortkeys( "libs/wine/collation.c" ); dump_sortkeys( "dlls/kernelbase/collation.c" ); +dump_ctype_tables( "libs/wine/wctype.c" ); +dump_compose_table( "libs/wine/compose.c" ); +dump_decompose_table( "libs/wine/decompose.c", 0 ); dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" ); dump_bidi_dir_table( "dlls/dwrite/direction.c" ); dump_digit_folding( "dlls/kernelbase/digitmap.c" ); -- 2.30.2