From 690c8817ceee6e39e6dc5843700c0f6d001341c9 Mon Sep 17 00:00:00 2001 From: Matthew Vernon Date: Thu, 23 Jan 2025 09:44:53 +0000 Subject: [PATCH] New upstream version 10.45-rc1 --- 132html | 317 - AUTHORS | 36 - AUTHORS.md | 200 + BUILD.bazel | 172 + CMakeLists.txt | 1822 ++-- ChangeLog | 188 + CheckMan | 78 - CleanTxt | 113 - Detrail | 35 - HACKING | 199 +- LICENCE => LICENCE.md | 75 +- MODULE.bazel | 9 + Makefile.am | 105 +- Makefile.in | 219 +- NEWS | 90 +- NON-AUTOTOOLS-BUILD | 96 +- PrepareRelease | 257 - README | 158 +- RunGrepTest | 140 +- RunGrepTest.bat | 476 +- RunTest | 161 +- RunTest.bat | 100 +- SECURITY.md | 44 + WORKSPACE.bazel | 1 + aclocal.m4 | 4 +- build.zig | 173 + cmake/COPYING-CMAKE-SCRIPTS | 2 +- cmake/FindEditline.cmake | 17 +- cmake/FindPackageHandleStandardArgs.cmake | 58 - cmake/FindReadline.cmake | 38 +- cmake/pcre2-config-version.cmake.in | 3 +- cmake/pcre2-config.cmake.in | 116 +- config-cmake.h.in | 12 +- config.guess | 16 +- config.sub | 20 +- configure | 2811 +++--- configure.ac | 80 +- .../allocator_src/sljitExecAllocatorApple.c | 0 .../allocator_src/sljitExecAllocatorCore.c | 0 .../allocator_src/sljitExecAllocatorFreeBSD.c | 0 .../allocator_src/sljitExecAllocatorPosix.c | 0 .../allocator_src/sljitExecAllocatorWindows.c | 0 .../sljitProtExecAllocatorNetBSD.c | 0 .../sljitProtExecAllocatorPosix.c | 0 .../allocator_src/sljitWXExecAllocatorPosix.c | 0 .../sljitWXExecAllocatorWindows.c | 0 .../sljit/sljit_src}/sljitConfig.h | 22 +- .../sljit/sljit_src}/sljitConfigCPU.h | 2 +- .../sljit/sljit_src}/sljitConfigInternal.h | 105 +- .../sljit => deps/sljit/sljit_src}/sljitLir.c | 871 +- .../sljit => deps/sljit/sljit_src}/sljitLir.h | 320 +- .../sljit/sljit_src}/sljitNativeARM_32.c | 334 +- .../sljit/sljit_src}/sljitNativeARM_64.c | 306 +- .../sljit/sljit_src}/sljitNativeARM_T2_32.c | 310 +- .../sljit_src}/sljitNativeLOONGARCH_64.c | 319 +- .../sljit/sljit_src}/sljitNativeMIPS_32.c | 0 .../sljit/sljit_src}/sljitNativeMIPS_64.c | 0 .../sljit/sljit_src}/sljitNativeMIPS_common.c | 130 +- .../sljit/sljit_src}/sljitNativePPC_32.c | 0 .../sljit/sljit_src}/sljitNativePPC_64.c | 0 .../sljit/sljit_src}/sljitNativePPC_common.c | 132 +- .../sljit/sljit_src}/sljitNativeRISCV_32.c | 0 .../sljit/sljit_src}/sljitNativeRISCV_64.c | 0 .../sljit_src}/sljitNativeRISCV_common.c | 938 +- .../sljit/sljit_src}/sljitNativeS390X.c | 212 +- .../sljit/sljit_src}/sljitNativeX86_32.c | 20 +- .../sljit/sljit_src}/sljitNativeX86_64.c | 33 +- .../sljit/sljit_src}/sljitNativeX86_common.c | 637 +- .../sljit/sljit_src}/sljitSerialize.c | 0 .../sljit/sljit_src}/sljitUtils.c | 0 doc/html/NON-AUTOTOOLS-BUILD.txt | 96 +- doc/html/README.txt | 158 +- doc/html/index.html | 9 + doc/html/pcre2.html | 22 +- doc/html/pcre2_compile.html | 1 + doc/html/pcre2_jit_compile.html | 27 +- doc/html/pcre2_set_compile_extra_options.html | 4 + ...pcre2_set_max_pattern_compiled_length.html | 6 +- doc/html/pcre2_set_optimize.html | 57 + doc/html/pcre2_set_substitute_callout.html | 2 +- .../pcre2_set_substitute_case_callout.html | 45 + doc/html/pcre2api.html | 512 +- doc/html/pcre2build.html | 2 +- doc/html/pcre2compat.html | 43 +- doc/html/pcre2convert.html | 2 +- doc/html/pcre2grep.html | 36 +- doc/html/pcre2jit.html | 17 +- doc/html/pcre2limits.html | 2 +- doc/html/pcre2matching.html | 59 +- doc/html/pcre2partial.html | 2 +- doc/html/pcre2pattern.html | 711 +- doc/html/pcre2perform.html | 2 +- doc/html/pcre2posix.html | 4 +- doc/html/pcre2sample.html | 2 +- doc/html/pcre2serialize.html | 2 +- doc/html/pcre2syntax.html | 263 +- doc/html/pcre2test.html | 136 +- doc/html/pcre2unicode.html | 40 +- doc/index.html.src | 318 - doc/pcre2-config.1 | 2 +- doc/pcre2-config.txt | 3 +- doc/pcre2.3 | 23 +- doc/pcre2.txt | 3632 ++++--- doc/pcre2_callout_enumerate.3 | 2 +- doc/pcre2_code_copy.3 | 2 +- doc/pcre2_code_copy_with_tables.3 | 2 +- doc/pcre2_code_free.3 | 2 +- doc/pcre2_compile.3 | 3 +- doc/pcre2_compile_context_copy.3 | 2 +- doc/pcre2_compile_context_create.3 | 2 +- doc/pcre2_compile_context_free.3 | 2 +- doc/pcre2_config.3 | 2 +- doc/pcre2_convert_context_copy.3 | 2 +- doc/pcre2_convert_context_create.3 | 2 +- doc/pcre2_convert_context_free.3 | 2 +- doc/pcre2_converted_pattern_free.3 | 2 +- doc/pcre2_dfa_match.3 | 2 +- doc/pcre2_general_context_copy.3 | 2 +- doc/pcre2_general_context_create.3 | 2 +- doc/pcre2_general_context_free.3 | 2 +- doc/pcre2_get_error_message.3 | 2 +- doc/pcre2_get_mark.3 | 2 +- doc/pcre2_get_match_data_heapframes_size.3 | 2 +- doc/pcre2_get_match_data_size.3 | 2 +- doc/pcre2_get_ovector_count.3 | 2 +- doc/pcre2_get_ovector_pointer.3 | 2 +- doc/pcre2_get_startchar.3 | 2 +- doc/pcre2_jit_compile.3 | 28 +- doc/pcre2_jit_free_unused_memory.3 | 2 +- doc/pcre2_jit_match.3 | 2 +- doc/pcre2_jit_stack_assign.3 | 2 +- doc/pcre2_jit_stack_create.3 | 2 +- doc/pcre2_jit_stack_free.3 | 2 +- doc/pcre2_maketables.3 | 2 +- doc/pcre2_maketables_free.3 | 2 +- doc/pcre2_match.3 | 2 +- doc/pcre2_match_context_copy.3 | 2 +- doc/pcre2_match_context_create.3 | 2 +- doc/pcre2_match_context_free.3 | 2 +- doc/pcre2_match_data_create.3 | 2 +- doc/pcre2_match_data_create_from_pattern.3 | 2 +- doc/pcre2_match_data_free.3 | 2 +- doc/pcre2_pattern_convert.3 | 2 +- doc/pcre2_pattern_info.3 | 2 +- doc/pcre2_serialize_decode.3 | 2 +- doc/pcre2_serialize_encode.3 | 2 +- doc/pcre2_serialize_free.3 | 2 +- doc/pcre2_serialize_get_number_of_codes.3 | 2 +- doc/pcre2_set_bsr.3 | 2 +- doc/pcre2_set_callout.3 | 2 +- doc/pcre2_set_character_tables.3 | 2 +- doc/pcre2_set_compile_extra_options.3 | 6 +- doc/pcre2_set_compile_recursion_guard.3 | 2 +- doc/pcre2_set_depth_limit.3 | 2 +- doc/pcre2_set_glob_escape.3 | 2 +- doc/pcre2_set_glob_separator.3 | 2 +- doc/pcre2_set_heap_limit.3 | 2 +- doc/pcre2_set_match_limit.3 | 2 +- doc/pcre2_set_max_pattern_compiled_length.3 | 8 +- doc/pcre2_set_max_pattern_length.3 | 2 +- doc/pcre2_set_max_varlookbehind.3 | 2 +- doc/pcre2_set_newline.3 | 2 +- doc/pcre2_set_offset_limit.3 | 2 +- doc/pcre2_set_optimize.3 | 42 + doc/pcre2_set_parens_nest_limit.3 | 2 +- doc/pcre2_set_recursion_limit.3 | 2 +- doc/pcre2_set_recursion_memory_management.3 | 2 +- doc/pcre2_set_substitute_callout.3 | 4 +- doc/pcre2_set_substitute_case_callout.3 | 33 + doc/pcre2_substitute.3 | 2 +- doc/pcre2_substring_copy_byname.3 | 2 +- doc/pcre2_substring_copy_bynumber.3 | 2 +- doc/pcre2_substring_free.3 | 2 +- doc/pcre2_substring_get_byname.3 | 2 +- doc/pcre2_substring_get_bynumber.3 | 2 +- doc/pcre2_substring_length_byname.3 | 2 +- doc/pcre2_substring_length_bynumber.3 | 2 +- doc/pcre2_substring_list_free.3 | 2 +- doc/pcre2_substring_list_get.3 | 2 +- doc/pcre2_substring_nametable_scan.3 | 2 +- doc/pcre2_substring_number_from_name.3 | 2 +- doc/pcre2api.3 | 495 +- doc/pcre2build.3 | 4 +- doc/pcre2callout.3 | 2 +- doc/pcre2compat.3 | 42 +- doc/pcre2convert.3 | 4 +- doc/pcre2demo.3 | 2 +- doc/pcre2grep.1 | 37 +- doc/pcre2grep.txt | 566 +- doc/pcre2jit.3 | 18 +- doc/pcre2limits.3 | 4 +- doc/pcre2matching.3 | 54 +- doc/pcre2partial.3 | 6 +- doc/pcre2pattern.3 | 637 +- doc/pcre2perform.3 | 4 +- doc/pcre2posix.3 | 6 +- doc/pcre2sample.3 | 4 +- doc/pcre2serialize.3 | 4 +- doc/pcre2syntax.3 | 180 +- doc/pcre2test.1 | 146 +- doc/pcre2test.txt | 916 +- doc/pcre2unicode.3 | 39 +- ltmain.sh | 422 +- m4/libtool.m4 | 264 +- m4/ltoptions.m4 | 6 +- m4/ltsugar.m4 | 2 +- m4/ltversion.m4 | 12 +- m4/lt~obsolete.m4 | 2 +- m4/pcre2_visibility.m4 | 91 +- perltest.sh | 175 +- src/config.h.generic | 52 +- src/config.h.in | 46 +- src/pcre2.h.generic | 71 +- src/pcre2.h.in | 65 +- src/pcre2_auto_possess.c | 143 +- src/pcre2_chkdint.c | 6 +- src/pcre2_compile.c | 3282 +++---- src/pcre2_compile.h | 280 + src/pcre2_compile_class.c | 2737 ++++++ src/pcre2_config.c | 4 +- src/pcre2_context.c | 56 +- src/pcre2_convert.c | 18 +- src/pcre2_dfa_match.c | 107 +- src/pcre2_error.c | 30 +- src/pcre2_extuni.c | 2 +- src/pcre2_find_bracket.c | 15 +- src/pcre2_fuzzsupport.c | 38 +- src/pcre2_internal.h | 365 +- src/pcre2_intmodedep.h | 63 +- src/pcre2_jit_char_inc.h | 2280 +++++ src/pcre2_jit_compile.c | 3991 +++----- src/pcre2_jit_match.c | 2 +- src/pcre2_jit_neon_inc.h | 8 +- src/pcre2_jit_simd_inc.h | 113 +- src/pcre2_jit_test.c | 13 + src/pcre2_maketables.c | 10 +- src/pcre2_match.c | 617 +- src/pcre2_match_data.c | 8 +- src/pcre2_ord2utf.c | 2 +- src/pcre2_pattern_info.c | 14 +- src/pcre2_printint.c | 519 +- src/pcre2_serialize.c | 36 +- src/pcre2_study.c | 240 +- src/pcre2_substitute.c | 1026 +- src/pcre2_substring.c | 4 +- src/pcre2_ucd.c | 8392 +++++++++-------- src/pcre2_ucp.h | 62 +- src/pcre2_ucptables.c | 777 +- src/pcre2_util.h | 132 + src/pcre2_xclass.c | 395 +- src/pcre2grep.c | 229 +- src/pcre2posix.c | 14 +- src/pcre2test.c | 743 +- testdata/grepinput | 19 + testdata/grepinputBad8 | 1 + testdata/grepinputBad8_Trail | 1 + testdata/grepinputC.bz2 | Bin 313 -> 315 bytes testdata/grepinputC.gz | Bin 311 -> 313 bytes testdata/grepinputUN | 2 + testdata/grepinputv | 1 + testdata/grepinputx | 2 +- testdata/grepnot.bz2 | 2 +- testdata/grepoutput | 103 +- testdata/grepoutputC | 12 + testdata/grepoutputCN | 12 + testdata/grepoutputCNU | 4 + testdata/grepoutputCU | 4 + testdata/grepoutputN | 21 +- testdata/grepoutputUN | 5 +- testdata/testinput1 | 439 +- testdata/testinput10 | 72 +- testdata/testinput11 | 130 + testdata/testinput12 | 119 +- testdata/testinput2 | 1667 +++- testdata/testinput21 | 2 + testdata/testinput23 | 2 + testdata/testinput26 | 1530 +-- testdata/testinput27 | 3251 +++++++ testdata/testinput3 | 14 +- testdata/testinput4 | 245 + testdata/testinput5 | 1135 ++- testdata/testinput6 | 160 +- testdata/testinput7 | 277 +- testdata/testinput9 | 28 +- testdata/testoutput1 | 645 +- testdata/testoutput10 | 349 +- testdata/testoutput11-16 | 325 +- testdata/testoutput11-32 | 478 +- testdata/testoutput12-16 | 516 +- testdata/testoutput12-32 | 510 +- testdata/testoutput14-16 | 2 +- testdata/testoutput15 | 30 +- testdata/testoutput17 | 18 +- testdata/testoutput2 | 4648 ++++++++- testdata/testoutput20 | 2 +- testdata/testoutput21 | 3 + testdata/testoutput23 | 3 + testdata/testoutput26 | 1530 +-- testdata/testoutput27 | 4153 ++++++++ testdata/testoutput3 | 39 +- testdata/testoutput3A | 39 +- testdata/testoutput3B | 39 +- testdata/testoutput4 | 386 + testdata/testoutput5 | 2915 +++++- testdata/testoutput6 | 260 +- testdata/testoutput7 | 428 +- testdata/testoutput8-16-2 | 207 +- testdata/testoutput8-16-3 | 207 +- testdata/testoutput8-16-4 | 207 +- testdata/testoutput8-32-2 | 207 +- testdata/testoutput8-32-3 | 207 +- testdata/testoutput8-32-4 | 207 +- testdata/testoutput8-8-2 | 207 +- testdata/testoutput8-8-3 | 207 +- testdata/testoutput8-8-4 | 207 +- testdata/testoutput9 | 68 +- testdata/wintestinput3 | 37 +- testdata/wintestoutput3 | 77 +- vms/configure.com | 8 + 319 files changed, 56386 insertions(+), 23034 deletions(-) delete mode 100755 132html delete mode 100644 AUTHORS create mode 100644 AUTHORS.md create mode 100644 BUILD.bazel delete mode 100755 CheckMan delete mode 100755 CleanTxt delete mode 100755 Detrail rename LICENCE => LICENCE.md (55%) create mode 100644 MODULE.bazel delete mode 100755 PrepareRelease create mode 100644 SECURITY.md create mode 100644 WORKSPACE.bazel create mode 100644 build.zig delete mode 100644 cmake/FindPackageHandleStandardArgs.cmake rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitExecAllocatorApple.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitExecAllocatorCore.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitExecAllocatorFreeBSD.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitExecAllocatorPosix.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitExecAllocatorWindows.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitProtExecAllocatorNetBSD.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitProtExecAllocatorPosix.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitWXExecAllocatorPosix.c (100%) rename {src/sljit => deps/sljit/sljit_src}/allocator_src/sljitWXExecAllocatorWindows.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitConfig.h (93%) rename {src/sljit => deps/sljit/sljit_src}/sljitConfigCPU.h (98%) rename {src/sljit => deps/sljit/sljit_src}/sljitConfigInternal.h (89%) rename {src/sljit => deps/sljit/sljit_src}/sljitLir.c (82%) rename {src/sljit => deps/sljit/sljit_src}/sljitLir.h (91%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeARM_32.c (93%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeARM_64.c (94%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeARM_T2_32.c (94%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeLOONGARCH_64.c (93%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeMIPS_32.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeMIPS_64.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeMIPS_common.c (97%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativePPC_32.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativePPC_64.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativePPC_common.c (96%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeRISCV_32.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeRISCV_64.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeRISCV_common.c (78%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeS390X.c (96%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeX86_32.c (98%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeX86_64.c (98%) rename {src/sljit => deps/sljit/sljit_src}/sljitNativeX86_common.c (90%) rename {src/sljit => deps/sljit/sljit_src}/sljitSerialize.c (100%) rename {src/sljit => deps/sljit/sljit_src}/sljitUtils.c (100%) create mode 100644 doc/html/pcre2_set_optimize.html create mode 100644 doc/html/pcre2_set_substitute_case_callout.html delete mode 100644 doc/index.html.src create mode 100644 doc/pcre2_set_optimize.3 create mode 100644 doc/pcre2_set_substitute_case_callout.3 mode change 100644 => 100755 ltmain.sh create mode 100644 src/pcre2_compile.h create mode 100644 src/pcre2_compile_class.c create mode 100644 src/pcre2_jit_char_inc.h create mode 100644 src/pcre2_util.h create mode 100644 testdata/grepinputBad8 create mode 100644 testdata/grepinputBad8_Trail create mode 100644 testdata/grepinputUN create mode 100644 testdata/testinput27 create mode 100644 testdata/testoutput27 diff --git a/132html b/132html deleted file mode 100755 index 05ab60e..0000000 --- a/132html +++ /dev/null @@ -1,317 +0,0 @@ -#! /usr/bin/perl -w - -# Script to turn PCRE2 man pages into HTML - - -# Subroutine to handle font changes and other escapes - -sub do_line { -my($s) = $_[0]; - -$s =~ s/ -$s =~ s/>/>/g; -$s =~ s"\\fI(.*?)\\f[RP]"$1"g; -$s =~ s"\\fB(.*?)\\f[RP]"$1"g; -$s =~ s"\\e"\\"g; -$s =~ s/(?<=Copyright )\(c\)/©/g; -$s; -} - -# Subroutine to ensure not in a paragraph - -sub end_para { -if ($inpara) - { - print TEMP "\n" if ($inpre); - print TEMP "

\n"; - } -$inpara = $inpre = 0; -$wrotetext = 0; -} - -# Subroutine to start a new paragraph - -sub new_para { -&end_para(); -print TEMP "

\n"; -$inpara = 1; -} - - -# Main program - -$innf = 0; -$inpara = 0; -$inpre = 0; -$wrotetext = 0; -$toc = 0; -$ref = 1; - -while ($#ARGV >= 0 && $ARGV[0] =~ /^-/) - { - $toc = 1 if $ARGV[0] eq "-toc"; - shift; - } - -# Initial output to STDOUT - -print < - -$ARGV[0] specification - - -

$ARGV[0] man page

-

-Return to the PCRE2 index page. -

-

-This page is part of the PCRE2 HTML documentation. It was generated -automatically from the original man page. If there is any nonsense in it, -please consult the man page, in case the conversion went wrong. -
-End - -print "

\n" if ($toc); - -# Copy the remainder to the standard output - -close(TEMP); -open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n"; - -print while (); - -print < -Return to the PCRE2 index page. -

-End - -close(TEMP); -unlink("/tmp/$$"); - -# End diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 9669f77..0000000 --- a/AUTHORS +++ /dev/null @@ -1,36 +0,0 @@ -THE MAIN PCRE2 LIBRARY CODE ---------------------------- - -Written by: Philip Hazel -Email local part: Philip.Hazel -Email domain: gmail.com - -Retired from University of Cambridge Computing Service, -Cambridge, England. - -Copyright (c) 1997-2024 University of Cambridge -All rights reserved - - -PCRE2 JUST-IN-TIME COMPILATION SUPPORT --------------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2010-2024 Zoltan Herczeg -All rights reserved. - - -STACK-LESS JUST-IN-TIME COMPILER --------------------------------- - -Written by: Zoltan Herczeg -Email local part: hzmester -Emain domain: freemail.hu - -Copyright(c) 2009-2024 Zoltan Herczeg -All rights reserved. - -#### diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..708fc23 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,200 @@ +PCRE2 Authorship and Contributors +================================= + +COPYRIGHT +--------- + +Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for +copyright details. + + +MAINTAINERS +----------- + +The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel. + +Since 2024, the contributors with administrator access to the project are now +Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for +GPG keys. + +Both administrators are volunteers acting in a personal capacity. + + + + + + + + + + + + + + + + + + +
NameRole
+ + Nicholas Wilson
+ `nicholas@nicholaswilson.me.uk`
+ Currently of Microsoft Research Cambridge, UK + +
+ + * General project administration & maintenance + * Release management + * Code maintenance + +
+ + Zoltán Herczeg
+ `hzmester@freemail.hu`
+ Currently of the University of Szeged, Hungary + +
+ + * Code maintenance + * Ownership of `sljit` and PCRE2's JIT + +
+ + +CONTRIBUTORS +------------ + +Many others have participated and contributed to PCRE2 over its history. + +The maintainers are grateful for all contributions and participation over the +years. We apologise for any names we have forgotten. + +We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and +maintainer from 1997 to 2024. + +All names listed alphabetically. + +### Contributors to PCRE2 + +This list includes names up until the PCRE2 10.44 release. New names will be +added from the Git history on each release. + + Scott Bell + Carlo Marcelo Arenas Belón + Edward Betts + Jan-Willem Blokland + Ross Burton + Dmitry Cherniachenko + Alexey Chupahin + Jessica Clarke + Alejandro Colomar + Jeremie Courreges-Anglas + Addison Crump + Alex Dowad + Daniel Engberg + Daniel Richard G + David Gaussmann + Andrey Gorbachev + Jordan Griege + Jason Hood + Bumsu Hyeon + Roy Ivy + Martin Joerg + Guillem Jover + Ralf Junker + Ayesh Karunaratne + Michael Kaufmann + Yunho Kim + Joshua Kinard + David Korczynski + Uwe Korn + Jonas Kvinge + Kristian Larsson + Kai Lu + Behzod Mansurov + B. Scott Michel + Nathan Moinvaziri + Mike Munday + Marc Mutz + Fabio Pagani + Christian Persch + Tristan Ross + William A Rowe Jr + David Seifert + Yaakov Selkowitz + Rich Siegel + Karl Skomski + Maciej Sroczyński + Wolfgang Stöggl + Thomas Tempelmann + Greg Thain + Lucas Trzesniewski + Theodore Tsirpanis + Matthew Vernon + Rémi Verschelde + Thomas Voss + Ezekiel Warren + Carl Weaver + Chris Wilson + Amin Yahyaabadi + Joe Zhang + +### Contributors to PCRE1 + +These people contributed either by sending patches or reporting serious issues. + + Irfan Adilovic + Alexander Barkov + Daniel Bergström + David Burgess + Ross Burton + David Byron + Fred Cox + Christian Ehrlicher + Tom Fortmann + Lionel Fourquaux + Mike Frysinger + Daniel Richard G + Dair Gran + "Graycode" (Red Hat Product Security) + Viktor Griph + Wen Guanxing + Robin Houston + Martin Jerabek + Peter Kankowski + Stephen Kelly + Yunho Kim + Joshua Kinard + Carsten Klein + Evgeny Kotkov + Ronald Landheer-Cieslak + Alan Lehotsky + Dmitry V. Levin + Nuno Lopes + Kai Lu + Giuseppe Maxia + Dan Mooney + Marc Mutz + Markus Oberhumer + Sheri Pierce + Petr Pisar + Ari Pollak + Bob Rossi + Ruiger Rill + Michael Shigorin + Rich Siegel + Craig Silverstein (C++ wrapper) + Karl Skomski + Paul Sokolovsky + Stan Switzer + Ian Taylor + Mark Tetrode + Jeff Trawick + Steven Van Ingelgem + Lawrence Velazquez + Jiong Wang + Stefan Weber + Chris Wilson + +Thanks go to Jeffrey Friedl for testing and debugging assistance. diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 0000000..c975ead --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,172 @@ +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") +load("@bazel_skylib//rules:native_binary.bzl", "native_test") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +copy_file( + name = "config_h_generic", + src = "src/config.h.generic", + out = "src/config.h", +) + +copy_file( + name = "pcre2_h_generic", + src = "src/pcre2.h.generic", + out = "src/pcre2.h", +) + +copy_file( + name = "pcre2_chartables_c", + src = "src/pcre2_chartables.c.dist", + out = "src/pcre2_chartables.c", +) + +# Removed src/pcre2_ucptables.c below because it is #included in +# src/pcre2_tables.c. Also fixed typo: ckdint should be chkdint. +# PH, 22-March-2023. +cc_library( + name = "pcre2", + srcs = [ + "src/pcre2_auto_possess.c", + "src/pcre2_chkdint.c", + "src/pcre2_compile.c", + "src/pcre2_compile_class.c", + "src/pcre2_config.c", + "src/pcre2_context.c", + "src/pcre2_convert.c", + "src/pcre2_dfa_match.c", + "src/pcre2_error.c", + "src/pcre2_extuni.c", + "src/pcre2_find_bracket.c", + "src/pcre2_jit_compile.c", + "src/pcre2_maketables.c", + "src/pcre2_match.c", + "src/pcre2_match_data.c", + "src/pcre2_newline.c", + "src/pcre2_ord2utf.c", + "src/pcre2_pattern_info.c", + "src/pcre2_script_run.c", + "src/pcre2_serialize.c", + "src/pcre2_string_utils.c", + "src/pcre2_study.c", + "src/pcre2_substitute.c", + "src/pcre2_substring.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + "src/pcre2_xclass.c", + ":pcre2_chartables_c", + "src/pcre2_compile.h", + "src/pcre2_internal.h", + "src/pcre2_intmodedep.h", + "src/pcre2_ucp.h", + "src/pcre2_util.h", + ":config_h_generic", + ], + textual_hdrs = [ + "src/pcre2_jit_match.c", + "src/pcre2_jit_misc.c", + "src/pcre2_ucptables.c", + ], + hdrs = [ + ":pcre2_h_generic", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "PCRE2_CODE_UNIT_WIDTH=8", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + ], + includes = ["src"], + strip_include_prefix = "src", + visibility = ["//visibility:public"], +) + +cc_library( + name = "pcre2-posix", + srcs = [ + "src/pcre2posix.c", + ":config_h_generic", + ], + hdrs = [ + "src/pcre2posix.h", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "PCRE2_CODE_UNIT_WIDTH=8", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + ], + includes = ["src"], + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [":pcre2"], +) + +# Totally weird issue in Bazel. It won't let you #include any files unless they +# are declared to the build system. OK, fair enough. But - for a cc_binary it +# uses the file extension to determine whether it's a header or a compilation +# unit. But... we have several .c files which are #included, rather than treated +# as a compilation unit. +# +# For cc_library() above, we can overcome this with textual_hdrs. But that +# doesn't work for cc_binary(). Here's our workaround. +# +# https://github.com/bazelbuild/bazel/issues/680 +cc_library( + name = "pcre2test_dotc_headers", + hdrs = [ + "src/pcre2_chkdint.c", + "src/pcre2_printint.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + ], + strip_include_prefix = "src", + visibility = ["//visibility:private"], +) + +cc_binary( + name = "pcre2test", + srcs = [ + "src/pcre2test.c", + ":config_h_generic", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "HAVE_STRERROR", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + "SUPPORT_PCRE2_8", + ] + select({ + "@platforms//os:windows": [], + "//conditions:default": ["HAVE_UNISTD_H"], + }), + linkopts = select({ + "@platforms//os:windows": ["-STACK:2500000"], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], + deps = [":pcre2test_dotc_headers", ":pcre2", ":pcre2-posix"], +) + +filegroup( + name = "testdata", + srcs = glob(["testdata/*"]), +) + +native_test( + name = "pcre2_test", + src = select({ + "@platforms//os:windows": "RunTest.bat", + "//conditions:default": "RunTest", + }), + out = select({ + "@platforms//os:windows": "RunTest.bat", + "//conditions:default": "RunTest", + }), + data = [":pcre2test", ":testdata"], + size = "small", +) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index b73ed73..9e91617 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,9 +103,16 @@ # 2023-01-15 Carlo added C99 as the minimum required # 2023-08-06 PH added support for setting variable length lookbehind maximum -# Increased minimum to 3.5 to workaround deprecated backward compatibility -# since 3.27. -cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +################################################################################ +# We have used `gersemi` for auto-formatting our CMake files. +# Applied to all CMake files using: +# > pip3 install gersemi +# > gersemi --in-place --line-length 120 --indent 2 \ +# ./CMakeLists.txt ./cmake/*.cmake ./cmake/*.cmake.in +################################################################################ + +# Increased minimum to 3.15 to allow use of string(REPEAT). +cmake_minimum_required(VERSION 3.15 FATAL_ERROR) project(PCRE2 C) set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD_REQUIRED TRUE) @@ -126,478 +133,551 @@ cmake_policy(SET CMP0063 NEW) # on the command line. # SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src) +include_directories(${PROJECT_SOURCE_DIR}/src) # external packages -FIND_PACKAGE( BZip2 ) -FIND_PACKAGE( ZLIB ) -FIND_PACKAGE( Readline ) -FIND_PACKAGE( Editline ) +find_package(BZip2) +find_package(ZLIB) +find_package(Readline) +find_package(Editline) # Configuration checks -INCLUDE(CheckCSourceCompiles) -INCLUDE(CheckFunctionExists) -INCLUDE(CheckSymbolExists) -INCLUDE(CheckIncludeFile) -INCLUDE(CheckTypeSize) -INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR - -CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) -CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H) -CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) -CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) -CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) - -CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) -CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) -CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) -CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) -CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) - -CHECK_C_SOURCE_COMPILES( - "#include - #include - int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }" +include(CheckCSourceCompiles) +include(CheckFunctionExists) +include(CheckSymbolExists) +include(CheckIncludeFile) +include(CheckTypeSize) +include(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR + +check_include_file(assert.h HAVE_ASSERT_H) +check_include_file(dirent.h HAVE_DIRENT_H) +check_include_file(sys/stat.h HAVE_SYS_STAT_H) +check_include_file(sys/types.h HAVE_SYS_TYPES_H) +check_include_file(unistd.h HAVE_UNISTD_H) +check_include_file(windows.h HAVE_WINDOWS_H) + +check_symbol_exists(bcopy "strings.h" HAVE_BCOPY) +check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) +check_symbol_exists(memmove "string.h" HAVE_MEMMOVE) +check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) +check_symbol_exists(strerror "string.h" HAVE_STRERROR) + +check_c_source_compiles( + [=[ + #include + #include + int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; } + ]=] HAVE_REALPATH ) set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) -set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") - -CHECK_C_SOURCE_COMPILES( - "#include - int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }" - HAVE_BUILTIN_MUL_OVERFLOW -) +if(NOT MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "XL") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") +endif() -CHECK_C_SOURCE_COMPILES( +check_c_source_compiles( "int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }" HAVE_ATTRIBUTE_UNINITIALIZED ) -CHECK_C_SOURCE_COMPILES([=[ +check_c_source_compiles( + [=[ extern __attribute__ ((visibility ("default"))) int f(void); int main(void) { return f(); } int f(void) { return 42; } - ]=] HAVE_VISIBILITY + ]=] + HAVE_VISIBILITY ) -if (HAVE_VISIBILITY) +set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) + +check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME) + +check_c_source_compiles( + [=[ + #include + int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; } + ]=] + HAVE_BUILTIN_MUL_OVERFLOW +) + +check_c_source_compiles( + "int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }" + HAVE_BUILTIN_UNREACHABLE +) + +if(HAVE_VISIBILITY) set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=]) else() set(PCRE2_EXPORT) endif() -set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) - # Check whether Intel CET is enabled, and if so, adjust compiler flags. This # code was written by PH, trying to imitate the logic from the autotools # configuration. -CHECK_C_SOURCE_COMPILES( - "#ifndef __CET__ - #error CET is not enabled - #endif - int main() { return 0; }" +check_c_source_compiles( + [=[ + #ifndef __CET__ + #error CET is not enabled + #endif + int main() { return 0; } + ]=] INTEL_CET_ENABLED ) -IF (INTEL_CET_ENABLED) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") -ENDIF(INTEL_CET_ENABLED) +if(INTEL_CET_ENABLED) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") +endif() # User-configurable options # # Note: CMakeSetup displays these in alphabetical order, regardless of # the order we use here. -SET(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.") +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.") + +option(BUILD_STATIC_LIBS "Build static libraries." ON) -OPTION(BUILD_STATIC_LIBS "Build static libraries." ON) +option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON) -OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON) +option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF) -OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF) +option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) -OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) +option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF) -OPTION(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF) +set(PCRE2_DEBUG "IfDebugBuild" CACHE STRING "Include debugging code") +set_property(CACHE PCRE2_DEBUG PROPERTY STRINGS "IfDebugBuild" "ON" "OFF") -OPTION(PCRE2_DEBUG "Include debugging code" OFF) +option(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) -OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) +set( + PCRE2_EBCDIC + OFF + CACHE BOOL + "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)" +) -SET(PCRE2_EBCDIC OFF CACHE BOOL - "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)") +set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.") -SET(PCRE2_EBCDIC_NL25 OFF CACHE BOOL - "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.") +set( + PCRE2_LINK_SIZE + "2" + CACHE STRING + "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details." +) -SET(PCRE2_LINK_SIZE "2" CACHE STRING - "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.") +set( + PCRE2_PARENS_NEST_LIMIT + "250" + CACHE STRING + "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details." +) -SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING - "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.") +set( + PCRE2_HEAP_LIMIT + "20000000" + CACHE STRING + "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details." +) -SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING - "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.") +set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.") -SET(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING - "Default limit on variable lookbehinds.") +set( + PCRE2_MATCH_LIMIT + "10000000" + CACHE STRING + "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details." +) -SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING - "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.") +set( + PCRE2_MATCH_LIMIT_DEPTH + "MATCH_LIMIT" + CACHE STRING + "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details." +) -SET(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING - "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.") +set( + PCRE2GREP_BUFSIZE + "20480" + CACHE STRING + "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details." +) -SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING - "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.") +set( + PCRE2GREP_MAX_BUFSIZE + "1048576" + CACHE STRING + "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details." +) -SET(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING - "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.") +set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).") -SET(PCRE2_NEWLINE "LF" CACHE STRING - "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).") +set(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL "Obsolete option: do not use") -SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL - "Obsolete option: do not use") +set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.") -SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL - "Enable support for Just-in-time compiling.") +if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).") +else() + set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE) +endif() -IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) - SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL - "Enable SELinux compatible execmem allocator in JIT (experimental).") -ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) - SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE) -ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) +set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.") -SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL - "Enable use of Just-in-time compiling in pcre2grep.") +set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.") -SET(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL - "Enable callout string support in pcre2grep.") +set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.") -SET(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL - "Enable callout string fork support in pcre2grep.") +set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") -SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL - "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") +set( + PCRE2_SUPPORT_BSR_ANYCRLF + OFF + CACHE BOOL + "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks" +) -SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL - "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks") +set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.") -SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL - "If ON, backslash-C (upper case C) is locked out.") +set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.") -SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL - "Enable Valgrind support.") +option(PCRE2_SHOW_REPORT "Show the final configuration report" ON) +option(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON) +option(PCRE2_BUILD_TESTS "Build the tests" ON) -OPTION(PCRE2_SHOW_REPORT "Show the final configuration report" ON) -OPTION(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON) -OPTION(PCRE2_BUILD_TESTS "Build the tests" ON) +set( + PCRE2_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/pcre2" + CACHE STRING + "Path used during CMake install for placing PCRE2's CMake config files, relative to the installation root (prefix)" +) -IF (MINGW) - OPTION(NON_STANDARD_LIB_PREFIX - "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." - OFF) +if(MINGW) + option( + NON_STANDARD_LIB_PREFIX + "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." + OFF + ) - OPTION(NON_STANDARD_LIB_SUFFIX - "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." - OFF) -ENDIF(MINGW) + option( + NON_STANDARD_LIB_SUFFIX + "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." + OFF + ) +endif() -IF(MSVC) - OPTION(PCRE2_STATIC_RUNTIME - "ON=Compile against the static runtime (/MT)." - OFF) - OPTION(INSTALL_MSVC_PDB - "ON=Install .pdb files built by MSVC, if generated" - OFF) -ENDIF(MSVC) +if(MSVC) + option(PCRE2_STATIC_RUNTIME "ON=Compile against the static runtime (/MT)." OFF) + option(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF) +endif() # bzip2 lib -IF(BZIP2_FOUND) - OPTION (PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON) -ENDIF(BZIP2_FOUND) -IF(PCRE2_SUPPORT_LIBBZ2) - INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR}) -ENDIF(PCRE2_SUPPORT_LIBBZ2) +if(BZIP2_FOUND) + option(PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON) +endif() +if(PCRE2_SUPPORT_LIBBZ2) + include_directories(${BZIP2_INCLUDE_DIR}) +endif() # zlib -IF(ZLIB_FOUND) - OPTION (PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON) -ENDIF(ZLIB_FOUND) -IF(PCRE2_SUPPORT_LIBZ) - INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) -ENDIF(PCRE2_SUPPORT_LIBZ) +if(ZLIB_FOUND) + option(PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON) +endif() +if(PCRE2_SUPPORT_LIBZ) + include_directories(${ZLIB_INCLUDE_DIR}) +endif() # editline lib -IF(EDITLINE_FOUND) - OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) -ENDIF(EDITLINE_FOUND) -IF(EDITLINE_FOUND) - IF(PCRE2_SUPPORT_LIBEDIT) - INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR}) - ENDIF(PCRE2_SUPPORT_LIBEDIT) -ELSE(EDITLINE_FOUND) - IF(PCRE2_SUPPORT_LIBEDIT) - MESSAGE(FATAL_ERROR +if(EDITLINE_FOUND) + option(PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) +endif() +if(EDITLINE_FOUND) + if(PCRE2_SUPPORT_LIBEDIT) + include_directories(${EDITLINE_INCLUDE_DIR}) + endif() +else() + if(PCRE2_SUPPORT_LIBEDIT) + message( + FATAL_ERROR " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n" " or set Editline_ROOT to a full libedit installed tree, as needed\n" " Might need to enable policy CMP0074 in CMakeLists.txt" ) - ENDIF(PCRE2_SUPPORT_LIBEDIT) -ENDIF(EDITLINE_FOUND) + endif() +endif() # readline lib -IF(READLINE_FOUND) - OPTION (PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON) -ENDIF(READLINE_FOUND) -IF(PCRE2_SUPPORT_LIBREADLINE) - INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR}) -ENDIF(PCRE2_SUPPORT_LIBREADLINE) +if(READLINE_FOUND) + option(PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON) +endif() +if(PCRE2_SUPPORT_LIBREADLINE) + include_directories(${READLINE_INCLUDE_DIR}) +endif() # Prepare build configuration -IF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) - MESSAGE(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.") -ENDIF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) - -IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) - MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled") -ENDIF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) - -IF(PCRE2_BUILD_PCRE2_8) - SET(SUPPORT_PCRE2_8 1) -ENDIF(PCRE2_BUILD_PCRE2_8) - -IF(PCRE2_BUILD_PCRE2_16) - SET(SUPPORT_PCRE2_16 1) -ENDIF(PCRE2_BUILD_PCRE2_16) - -IF(PCRE2_BUILD_PCRE2_32) - SET(SUPPORT_PCRE2_32 1) -ENDIF(PCRE2_BUILD_PCRE2_32) - -IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) - MESSAGE(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program") - SET(PCRE2_BUILD_PCRE2GREP OFF) -ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) - -IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) - IF(READLINE_FOUND) - MESSAGE(FATAL_ERROR - " Only one of the readline compatible libraries can be enabled.\n" - " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" - ) - ENDIF(READLINE_FOUND) -ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) - -IF(PCRE2_SUPPORT_BSR_ANYCRLF) - SET(BSR_ANYCRLF 1) -ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF) - -IF(PCRE2_NEVER_BACKSLASH_C) - SET(NEVER_BACKSLASH_C 1) -ENDIF(PCRE2_NEVER_BACKSLASH_C) - -IF(PCRE2_SUPPORT_UNICODE) - SET(SUPPORT_UNICODE 1) -ENDIF(PCRE2_SUPPORT_UNICODE) - -IF(PCRE2_SUPPORT_JIT) - SET(SUPPORT_JIT 1) - IF(UNIX) - FIND_PACKAGE(Threads REQUIRED) - IF(CMAKE_USE_PTHREADS_INIT) - SET(REQUIRE_PTHREAD 1) - ENDIF(CMAKE_USE_PTHREADS_INIT) - ENDIF(UNIX) -ENDIF(PCRE2_SUPPORT_JIT) - -IF(PCRE2_SUPPORT_JIT_SEALLOC) - SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) - CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED) - UNSET(CMAKE_REQUIRED_DEFINITIONS) - IF(${REQUIRED}) - IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) - ADD_DEFINITIONS(-D_GNU_SOURCE) - SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) - ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) - MESSAGE(FATAL_ERROR "Your configuration is not supported") - ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) - ELSE(${REQUIRED}) - SET(PCRE2_SUPPORT_JIT_SEALLOC OFF) - ENDIF(${REQUIRED}) -ENDIF(PCRE2_SUPPORT_JIT_SEALLOC) - -IF(PCRE2GREP_SUPPORT_JIT) - SET(SUPPORT_PCRE2GREP_JIT 1) -ENDIF(PCRE2GREP_SUPPORT_JIT) - -IF(PCRE2GREP_SUPPORT_CALLOUT) - SET(SUPPORT_PCRE2GREP_CALLOUT 1) - IF(PCRE2GREP_SUPPORT_CALLOUT_FORK) - SET(SUPPORT_PCRE2GREP_CALLOUT_FORK 1) - ENDIF(PCRE2GREP_SUPPORT_CALLOUT_FORK) -ENDIF(PCRE2GREP_SUPPORT_CALLOUT) - -IF(PCRE2_SUPPORT_VALGRIND) - SET(SUPPORT_VALGRIND 1) -ENDIF(PCRE2_SUPPORT_VALGRIND) - -IF(PCRE2_DISABLE_PERCENT_ZT) - SET(DISABLE_PERCENT_ZT 1) -ENDIF(PCRE2_DISABLE_PERCENT_ZT) +if(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) + message(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.") +endif() + +if(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) + message( + FATAL_ERROR + "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled" + ) +endif() + +if(PCRE2_BUILD_PCRE2_8) + set(SUPPORT_PCRE2_8 1) +endif() + +if(PCRE2_BUILD_PCRE2_16) + set(SUPPORT_PCRE2_16 1) +endif() + +if(PCRE2_BUILD_PCRE2_32) + set(SUPPORT_PCRE2_32 1) +endif() + +if(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) + message(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program") + set(PCRE2_BUILD_PCRE2GREP OFF) +endif() + +if(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) + if(READLINE_FOUND) + message( + FATAL_ERROR + " Only one of the readline compatible libraries can be enabled.\n" + " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" + ) + endif() +endif() + +if(PCRE2_SUPPORT_BSR_ANYCRLF) + set(BSR_ANYCRLF 1) +endif() + +if(PCRE2_NEVER_BACKSLASH_C) + set(NEVER_BACKSLASH_C 1) +endif() + +if(PCRE2_SUPPORT_UNICODE) + set(SUPPORT_UNICODE 1) +endif() + +if(PCRE2_SUPPORT_JIT) + set(SUPPORT_JIT 1) + if(UNIX) + find_package(Threads REQUIRED) + if(CMAKE_USE_PTHREADS_INIT) + set(REQUIRE_PTHREAD 1) + endif() + endif() +endif() + +if(PCRE2_SUPPORT_JIT_SEALLOC) + set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) + check_symbol_exists(mkostemp stdlib.h REQUIRED) + unset(CMAKE_REQUIRED_DEFINITIONS) + if(${REQUIRED}) + if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + add_compile_definitions(_GNU_SOURCE) + set(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) + else() + message(FATAL_ERROR "Your configuration is not supported") + endif() + else() + set(PCRE2_SUPPORT_JIT_SEALLOC OFF) + endif() +endif() + +if(PCRE2GREP_SUPPORT_JIT) + set(SUPPORT_PCRE2GREP_JIT 1) +endif() + +if(PCRE2GREP_SUPPORT_CALLOUT) + set(SUPPORT_PCRE2GREP_CALLOUT 1) + if(PCRE2GREP_SUPPORT_CALLOUT_FORK) + set(SUPPORT_PCRE2GREP_CALLOUT_FORK 1) + endif() +endif() + +if(PCRE2_SUPPORT_VALGRIND) + set(SUPPORT_VALGRIND 1) +endif() + +if(PCRE2_DISABLE_PERCENT_ZT) + set(DISABLE_PERCENT_ZT 1) +endif() # This next one used to reference ${READLINE_LIBRARY}) # but I was advised to add the NCURSES test as well, along with # some modifications to cmake/FindReadline.cmake which should # make it possible to override the default if necessary. PH -IF(PCRE2_SUPPORT_LIBREADLINE) - SET(SUPPORT_LIBREADLINE 1) - SET(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY}) -ENDIF(PCRE2_SUPPORT_LIBREADLINE) +if(PCRE2_SUPPORT_LIBREADLINE) + set(SUPPORT_LIBREADLINE 1) + set(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY}) +endif() # libedit is a plug-compatible alternative to libreadline -IF(PCRE2_SUPPORT_LIBEDIT) - SET(SUPPORT_LIBEDIT 1) - SET(PCRE2TEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY}) -ENDIF(PCRE2_SUPPORT_LIBEDIT) - -IF(PCRE2_SUPPORT_LIBZ) - SET(SUPPORT_LIBZ 1) - SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES}) -ENDIF(PCRE2_SUPPORT_LIBZ) - -IF(PCRE2_SUPPORT_LIBBZ2) - SET(SUPPORT_LIBBZ2 1) - SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES}) -ENDIF(PCRE2_SUPPORT_LIBBZ2) - -SET(NEWLINE_DEFAULT "") - -IF(PCRE2_NEWLINE STREQUAL "CR") - SET(NEWLINE_DEFAULT "1") -ENDIF(PCRE2_NEWLINE STREQUAL "CR") -IF(PCRE2_NEWLINE STREQUAL "LF") - SET(NEWLINE_DEFAULT "2") -ENDIF(PCRE2_NEWLINE STREQUAL "LF") -IF(PCRE2_NEWLINE STREQUAL "CRLF") - SET(NEWLINE_DEFAULT "3") -ENDIF(PCRE2_NEWLINE STREQUAL "CRLF") -IF(PCRE2_NEWLINE STREQUAL "ANY") - SET(NEWLINE_DEFAULT "4") -ENDIF(PCRE2_NEWLINE STREQUAL "ANY") -IF(PCRE2_NEWLINE STREQUAL "ANYCRLF") - SET(NEWLINE_DEFAULT "5") -ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF") -IF(PCRE2_NEWLINE STREQUAL "NUL") - SET(NEWLINE_DEFAULT "6") -ENDIF(PCRE2_NEWLINE STREQUAL "NUL") - -IF(NEWLINE_DEFAULT STREQUAL "") - MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".") -ENDIF(NEWLINE_DEFAULT STREQUAL "") - -IF(PCRE2_EBCDIC) - SET(EBCDIC 1) -ENDIF(PCRE2_EBCDIC) - -IF(PCRE2_EBCDIC_NL25) - SET(EBCDIC 1) - SET(EBCDIC_NL25 1) -ENDIF(PCRE2_EBCDIC_NL25) +if(PCRE2_SUPPORT_LIBEDIT) + set(SUPPORT_LIBEDIT 1) + set(PCRE2TEST_LIBS ${EDITLINE_LIBRARY}) +endif() + +if(PCRE2_SUPPORT_LIBZ) + set(SUPPORT_LIBZ 1) + set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES}) +endif() + +if(PCRE2_SUPPORT_LIBBZ2) + set(SUPPORT_LIBBZ2 1) + set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES}) +endif() + +set(NEWLINE_DEFAULT "") + +if(PCRE2_NEWLINE STREQUAL "CR") + set(NEWLINE_DEFAULT "1") +endif() +if(PCRE2_NEWLINE STREQUAL "LF") + set(NEWLINE_DEFAULT "2") +endif() +if(PCRE2_NEWLINE STREQUAL "CRLF") + set(NEWLINE_DEFAULT "3") +endif() +if(PCRE2_NEWLINE STREQUAL "ANY") + set(NEWLINE_DEFAULT "4") +endif() +if(PCRE2_NEWLINE STREQUAL "ANYCRLF") + set(NEWLINE_DEFAULT "5") +endif() +if(PCRE2_NEWLINE STREQUAL "NUL") + set(NEWLINE_DEFAULT "6") +endif() + +if(NEWLINE_DEFAULT STREQUAL "") + message( + FATAL_ERROR + "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\"." + ) +endif() + +if(PCRE2_EBCDIC) + set(EBCDIC 1) +endif() + +if(PCRE2_EBCDIC_NL25) + set(EBCDIC 1) + set(EBCDIC_NL25 1) +endif() # Output files -CONFIGURE_FILE(config-cmake.h.in - ${PROJECT_BINARY_DIR}/config.h - @ONLY) +configure_file(config-cmake.h.in ${PROJECT_BINARY_DIR}/config.h @ONLY) # Parse version numbers and date out of configure.ac -file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac +file( + STRINGS + ${PROJECT_SOURCE_DIR}/configure.ac configure_lines - LIMIT_COUNT 50 # Read only the first 50 lines of the file + LIMIT_COUNT + 50 # Read only the first 50 lines of the file ) -set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date" - "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version") +set( + SEARCHED_VARIABLES + "pcre2_major" + "pcre2_minor" + "pcre2_prerelease" + "pcre2_date" + "libpcre2_posix_version" + "libpcre2_8_version" + "libpcre2_16_version" + "libpcre2_32_version" +) foreach(configure_line ${configure_lines}) - foreach(_substitution_variable ${SEARCHED_VARIABLES}) - string(TOUPPER ${_substitution_variable} _substitution_variable_upper) - if (NOT ${_substitution_variable_upper}) - string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) - if (CMAKE_MATCH_1) - set(${_substitution_variable_upper} ${CMAKE_MATCH_1}) - endif() - endif() - endforeach() + foreach(substitution_variable ${SEARCHED_VARIABLES}) + string(TOUPPER ${substitution_variable} substitution_variable_upper) + if(NOT ${substitution_variable_upper}) + string(REGEX MATCH "m4_define\\(${substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) + if(CMAKE_MATCH_1) + set(${substitution_variable_upper} ${CMAKE_MATCH_1}) + endif() + endif() + endforeach() endforeach() -macro(PARSE_LIB_VERSION VARIABLE_PREFIX) - string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION}) - list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT) - list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION) - list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE) - - math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}") - math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1") - math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1") - set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}") - set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}") +macro(PARSE_LIB_VERSION variable_prefix) + string(REPLACE ":" ";" ${variable_prefix}_VERSION_LIST ${${variable_prefix}_VERSION}) + list(GET ${variable_prefix}_VERSION_LIST 0 ${variable_prefix}_VERSION_CURRENT) + list(GET ${variable_prefix}_VERSION_LIST 1 ${variable_prefix}_VERSION_REVISION) + list(GET ${variable_prefix}_VERSION_LIST 2 ${variable_prefix}_VERSION_AGE) + + math(EXPR ${variable_prefix}_SOVERSION "${${variable_prefix}_VERSION_CURRENT} - ${${variable_prefix}_VERSION_AGE}") + math(EXPR ${variable_prefix}_MACHO_COMPATIBILITY_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1") + math(EXPR ${variable_prefix}_MACHO_CURRENT_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1") + set( + ${variable_prefix}_MACHO_CURRENT_VERSION + "${${variable_prefix}_MACHO_CURRENT_VERSION}.${${variable_prefix}_VERSION_REVISION}}" + ) + set( + ${variable_prefix}_VERSION + "${${variable_prefix}_SOVERSION}.${${variable_prefix}_VERSION_AGE}.${${variable_prefix}_VERSION_REVISION}" + ) endmacro() -PARSE_LIB_VERSION(LIBPCRE2_POSIX) -PARSE_LIB_VERSION(LIBPCRE2_8) -PARSE_LIB_VERSION(LIBPCRE2_16) -PARSE_LIB_VERSION(LIBPCRE2_32) +parse_lib_version(LIBPCRE2_POSIX) +parse_lib_version(LIBPCRE2_8) +parse_lib_version(LIBPCRE2_16) +parse_lib_version(LIBPCRE2_32) -CONFIGURE_FILE(src/pcre2.h.in - ${PROJECT_BINARY_DIR}/pcre2.h - @ONLY) +configure_file(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY) # Make sure to not link debug libs # against release libs and vice versa -IF(WIN32) - SET(CMAKE_DEBUG_POSTFIX "d") -ENDIF(WIN32) +if(WIN32) + set(CMAKE_DEBUG_POSTFIX "d") +endif() # Character table generation -OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF) -IF(PCRE2_REBUILD_CHARTABLES) - ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c) - ADD_CUSTOM_COMMAND( - COMMENT "Generating character tables (pcre2_chartables.c) for current locale" - DEPENDS pcre2_dftables +option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF) +if(PCRE2_REBUILD_CHARTABLES) + add_executable(pcre2_dftables src/pcre2_dftables.c) + add_custom_command( + OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c COMMAND pcre2_dftables - ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c - OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c + ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c + DEPENDS pcre2_dftables + COMMENT "Generating character tables (pcre2_chartables.c) for current locale" + VERBATIM ) -ELSE(PCRE2_REBUILD_CHARTABLES) - CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist - ${PROJECT_BINARY_DIR}/pcre2_chartables.c - COPYONLY) -ENDIF(PCRE2_REBUILD_CHARTABLES) +else() + configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY) +endif() # Source code -SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h) +set(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h) -SET(PCRE2_SOURCES +set( + PCRE2_SOURCES src/pcre2_auto_possess.c ${PROJECT_BINARY_DIR}/pcre2_chartables.c src/pcre2_chkdint.c src/pcre2_compile.c + src/pcre2_compile_class.c src/pcre2_config.c src/pcre2_context.c src/pcre2_convert.c @@ -624,284 +704,326 @@ SET(PCRE2_SOURCES src/pcre2_xclass.c ) -SET(PCRE2POSIX_HEADERS src/pcre2posix.h) -SET(PCRE2POSIX_SOURCES src/pcre2posix.c) +set(PCRE2POSIX_HEADERS src/pcre2posix.h) +set(PCRE2POSIX_SOURCES src/pcre2posix.c) -IF(MINGW AND BUILD_SHARED_LIBS) - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) - ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o - PRE-LINK - COMMAND windres ARGS pcre2.rc pcre2.o +if(MINGW AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o PRE-LINK + COMMAND windres + ARGS pcre2.rc pcre2.o WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMENT Using pcre2 coff info in mingw build) - SET(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o) - ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) - - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) - ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o - PRE-LINK - COMMAND windres ARGS pcre2posix.rc pcre2posix.o + COMMENT "Using pcre2 coff info in mingw build" + ) + set(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o) + endif() + + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o PRE-LINK + COMMAND windres + ARGS pcre2posix.rc pcre2posix.o WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMENT Using pcre2posix coff info in mingw build) - SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o) - ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) -ENDIF(MINGW AND BUILD_SHARED_LIBS) - -IF(MSVC AND BUILD_SHARED_LIBS) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files}) - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) - SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc) - ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) - - IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) - SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc) - ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) -ENDIF(MSVC AND BUILD_SHARED_LIBS) + COMMENT "Using pcre2posix coff info in mingw build" + ) + set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o) + endif() +endif() + +if(MSVC AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + set(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc) + endif() + + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc) + endif() +endif() # Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681 # This code was taken from the CMake wiki, not from WebM. -IF(MSVC AND PCRE2_STATIC_RUNTIME) - MESSAGE(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") - foreach(flag_var - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) +if(MSVC AND PCRE2_STATIC_RUNTIME) + message(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") + foreach( + flag_var + CMAKE_C_FLAGS + CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELWITHDEBINFO + ) string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") endforeach() -ENDIF(MSVC AND PCRE2_STATIC_RUNTIME) +endif() # Build setup -ADD_DEFINITIONS(-DHAVE_CONFIG_H) +add_compile_definitions(HAVE_CONFIG_H) -IF(MSVC) - ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS) -ENDIF(MSVC) +if(PCRE2_DEBUG STREQUAL "IfDebugBuild") + add_compile_definitions("$<$:PCRE2_DEBUG>") +elseif(PCRE2_DEBUG) + add_compile_definitions("PCRE2_DEBUG") +endif() + +if(MSVC) + add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS) +endif() -SET(CMAKE_INCLUDE_CURRENT_DIR 1) +set(CMAKE_INCLUDE_CURRENT_DIR 1) -set(targets) +set(TARGETS) # 8-bit library -IF(PCRE2_BUILD_PCRE2_8) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_8_VERSION} - SOVERSION ${LIBPCRE2_8_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC) - TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - SET(targets ${targets} pcre2-8-static) - ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_POSIX_VERSION} - SOVERSION ${LIBPCRE2_POSIX_SOVERSION}) - TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static) - TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src) - set(targets ${targets} pcre2-posix-static) - - IF(MSVC) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) - SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) - ENDIF(MSVC) - IF(PCRE2_STATIC_PIC) - SET_TARGET_PROPERTIES(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1) - ENDIF(PCRE2_STATIC_PIC) - ENDIF(BUILD_STATIC_LIBS) - - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_8_VERSION} - SOVERSION ${LIBPCRE2_8_SOVERSION} - OUTPUT_NAME pcre2-8) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-8-shared) - - ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) - TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src) - SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_POSIX_VERSION} - SOVERSION ${LIBPCRE2_POSIX_SOVERSION} - OUTPUT_NAME pcre2-posix) +if(PCRE2_BUILD_PCRE2_8) + if(BUILD_STATIC_LIBS) + add_library(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + set_target_properties( + pcre2-8-static + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + ) + target_compile_definitions(pcre2-8-static PUBLIC PCRE2_STATIC) + target_include_directories(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-8-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-8-static) + add_library(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + set_target_properties( + pcre2-posix-static + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + ) + target_link_libraries(pcre2-posix-static pcre2-8-static) + target_include_directories(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src) + set(TARGETS ${TARGETS} pcre2-posix-static) + + if(MSVC) + set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) + set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) + else() + set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) + set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() + + if(BUILD_SHARED_LIBS) + add_library(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-8-shared + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + OUTPUT_NAME pcre2-8 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-8-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-8-shared) + set(DLL_PDB_FILES $/pcre2-8.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-8d.pdb ${DLL_PDB_DEBUG_FILES}) + + add_library(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + target_include_directories(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src) + set_target_properties( + pcre2-posix-shared + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + OUTPUT_NAME pcre2-posix + ) set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED") - TARGET_COMPILE_DEFINITIONS(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG}) - TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared) - SET(targets ${targets} pcre2-posix-shared) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files}) - - IF(MINGW) - IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "") - ENDIF(NON_STANDARD_LIB_PREFIX) - IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll") - ENDIF(NON_STANDARD_LIB_SUFFIX) - ENDIF(MINGW) - ENDIF(BUILD_SHARED_LIBS) - - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-static) - ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-static) - ELSE(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-shared) - ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-shared) - ENDIF(BUILD_STATIC_LIBS) -ENDIF(PCRE2_BUILD_PCRE2_8) + target_compile_definitions(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG}) + target_link_libraries(pcre2-posix-shared pcre2-8-shared) + set(TARGETS ${TARGETS} pcre2-posix-shared) + set(DLL_PDB_FILES $/pcre2-posix.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-posixd.pdb ${DLL_PDB_DEBUG_FILES}) + + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() + + if(BUILD_STATIC_LIBS) + add_library(pcre2-8 ALIAS pcre2-8-static) + add_library(pcre2-posix ALIAS pcre2-posix-static) + else() + add_library(pcre2-8 ALIAS pcre2-8-shared) + add_library(pcre2-posix ALIAS pcre2-posix-shared) + endif() +endif() # 16-bit library -IF(PCRE2_BUILD_PCRE2_16) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_16_VERSION} - SOVERSION ${LIBPCRE2_16_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-16-static) - - IF(MSVC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) - ENDIF(MSVC) - IF(PCRE2_STATIC_PIC) - SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1) - ENDIF(PCRE2_STATIC_PIC) - ENDIF(BUILD_STATIC_LIBS) - - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_16_VERSION} - SOVERSION ${LIBPCRE2_16_SOVERSION} - OUTPUT_NAME pcre2-16) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-16-shared) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files}) - - IF(MINGW) - IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES PREFIX "") - ENDIF(NON_STANDARD_LIB_PREFIX) - IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES SUFFIX "-0.dll") - ENDIF(NON_STANDARD_LIB_SUFFIX) - ENDIF(MINGW) - ENDIF(BUILD_SHARED_LIBS) - - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-static) - ELSE(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-shared) - ENDIF(BUILD_STATIC_LIBS) -ENDIF(PCRE2_BUILD_PCRE2_16) +if(PCRE2_BUILD_PCRE2_16) + if(BUILD_STATIC_LIBS) + add_library(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-16-static + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + ) + target_compile_definitions(pcre2-16-static PUBLIC PCRE2_STATIC) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-16-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-16-static) + + if(MSVC) + set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) + else() + set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() + + if(BUILD_SHARED_LIBS) + add_library(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-16-shared + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + OUTPUT_NAME pcre2-16 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-16-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-16-shared) + set(DLL_PDB_FILES $/pcre2-16.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-16d.pdb ${DLL_PDB_DEBUG_FILES}) + + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-16-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-16-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() + + if(BUILD_STATIC_LIBS) + add_library(pcre2-16 ALIAS pcre2-16-static) + else() + add_library(pcre2-16 ALIAS pcre2-16-shared) + endif() +endif() # 32-bit library -IF(PCRE2_BUILD_PCRE2_32) - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_32_VERSION} - SOVERSION ${LIBPCRE2_32_SOVERSION}) - TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-32-static) - - IF(MSVC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) - ELSE(MSVC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) - ENDIF(MSVC) - IF(PCRE2_STATIC_PIC) - SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1) - ENDIF(PCRE2_STATIC_PIC) - ENDIF(BUILD_STATIC_LIBS) - - IF(BUILD_SHARED_LIBS) - ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) - TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES UNITY_BUILD OFF - COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 - MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" - MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" - VERSION ${LIBPCRE2_32_VERSION} - SOVERSION ${LIBPCRE2_32_SOVERSION} - OUTPUT_NAME pcre2-32) - IF(REQUIRE_PTHREAD) - TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads) - ENDIF(REQUIRE_PTHREAD) - set(targets ${targets} pcre2-32-shared) - SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files}) - SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files}) - - IF(MINGW) - IF(NON_STANDARD_LIB_PREFIX) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES PREFIX "") - ENDIF(NON_STANDARD_LIB_PREFIX) - IF(NON_STANDARD_LIB_SUFFIX) - SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES SUFFIX "-0.dll") - ENDIF(NON_STANDARD_LIB_SUFFIX) - ENDIF(MINGW) - ENDIF(BUILD_SHARED_LIBS) - - IF(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-static) - ELSE(BUILD_STATIC_LIBS) - ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-shared) - ENDIF(BUILD_STATIC_LIBS) -ENDIF(PCRE2_BUILD_PCRE2_32) +if(PCRE2_BUILD_PCRE2_32) + if(BUILD_STATIC_LIBS) + add_library(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-32-static + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + ) + target_compile_definitions(pcre2-32-static PUBLIC PCRE2_STATIC) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-32-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-32-static) + + if(MSVC) + set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) + else() + set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() + + if(BUILD_SHARED_LIBS) + add_library(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-32-shared + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + OUTPUT_NAME pcre2-32 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-32-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-32-shared) + set(DLL_PDB_FILES $/pcre2-32.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-32d.pdb ${DLL_PDB_DEBUG_FILES}) + + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-32-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-32-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() + + if(BUILD_STATIC_LIBS) + add_library(pcre2-32 ALIAS pcre2-32-static) + else() + add_library(pcre2-32 ALIAS pcre2-32-shared) + endif() +endif() # Generate pkg-config files set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}") set(prefix ${CMAKE_INSTALL_PREFIX}) - set(exec_prefix "\${prefix}") set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") set(includedir "\${prefix}/include") @@ -939,295 +1061,343 @@ configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF) # Executables -IF(PCRE2_BUILD_PCRE2GREP) - ADD_EXECUTABLE(pcre2grep src/pcre2grep.c) - SET_PROPERTY(TARGET pcre2grep - PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) - set(targets ${targets} pcre2grep) - TARGET_LINK_LIBRARIES(pcre2grep pcre2-posix ${PCRE2GREP_LIBS}) -ENDIF(PCRE2_BUILD_PCRE2GREP) +if(PCRE2_BUILD_PCRE2GREP) + add_executable(pcre2grep src/pcre2grep.c) + set_property(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + set(TARGETS ${TARGETS} pcre2grep) + target_link_libraries(pcre2grep pcre2-posix ${PCRE2GREP_LIBS}) +endif() # Testing -IF(PCRE2_BUILD_TESTS) - ENABLE_TESTING() +if(PCRE2_BUILD_TESTS) + enable_testing() - SET(PCRE2TEST_SOURCES src/pcre2test.c) + set(PCRE2TEST_SOURCES src/pcre2test.c) - IF(MSVC) + if(MSVC) # This is needed to avoid a stack overflow error in the standard tests. The # flag should be indicated with a forward-slash instead of a hyphen, but # then CMake treats it as a file path. - SET(PCRE2TEST_LINKER_FLAGS -STACK:2500000) - ENDIF(MSVC) - - ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES}) - set(targets ${targets} pcre2test) - IF(PCRE2_BUILD_PCRE2_8) - LIST(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8) - ENDIF(PCRE2_BUILD_PCRE2_8) - IF(PCRE2_BUILD_PCRE2_16) - LIST(APPEND PCRE2TEST_LIBS pcre2-16) - ENDIF(PCRE2_BUILD_PCRE2_16) - IF(PCRE2_BUILD_PCRE2_32) - LIST(APPEND PCRE2TEST_LIBS pcre2-32) - ENDIF(PCRE2_BUILD_PCRE2_32) - TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS}) - - IF(PCRE2_BUILD_PCRE2_8) - ADD_EXECUTABLE(pcre2posix_test src/pcre2posix_test.c) - TARGET_LINK_LIBRARIES(pcre2posix_test pcre2-posix pcre2-8) - ENDIF(PCRE2_BUILD_PCRE2_8) - - IF(PCRE2_SUPPORT_JIT) - ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c) - SET(PCRE2_JIT_TEST_LIBS) - IF(PCRE2_BUILD_PCRE2_8) - LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-8) - ENDIF(PCRE2_BUILD_PCRE2_8) - IF(PCRE2_BUILD_PCRE2_16) - LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-16) - ENDIF(PCRE2_BUILD_PCRE2_16) - IF(PCRE2_BUILD_PCRE2_32) - LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-32) - ENDIF(PCRE2_BUILD_PCRE2_32) - TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS}) - ENDIF(PCRE2_SUPPORT_JIT) - - # exes in Debug location tested by the RunTest and RunGrepTest shell scripts - # via "make test" - - # The commented out code below provokes a warning about future removal - # of the facility, and requires policy CMP0026 to be set to "OLD". I have - # got fed-up with the warnings, but my plea for help on the mailing list - # produced no response. So, I've hacked. The new code below seems to work on - # Linux. - -# IF(PCRE2_BUILD_PCRE2GREP) -# GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION) -# ENDIF(PCRE2_BUILD_PCRE2GREP) -# -# GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION) - - IF(PCRE2_BUILD_PCRE2GREP) - SET(PCRE2GREP_EXE $) - ENDIF(PCRE2_BUILD_PCRE2GREP) - - SET(PCRE2TEST_EXE $) - - -# ================================================= + set(PCRE2TEST_LINKER_FLAGS -STACK:2500000) + endif() + + add_executable(pcre2test ${PCRE2TEST_SOURCES}) + set(TARGETS ${TARGETS} pcre2test) + if(PCRE2_BUILD_PCRE2_8) + list(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8) + endif() + if(PCRE2_BUILD_PCRE2_16) + list(APPEND PCRE2TEST_LIBS pcre2-16) + endif() + if(PCRE2_BUILD_PCRE2_32) + list(APPEND PCRE2TEST_LIBS pcre2-32) + endif() + target_link_libraries(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS}) + + if(PCRE2_BUILD_PCRE2_8) + add_executable(pcre2posix_test src/pcre2posix_test.c) + target_link_libraries(pcre2posix_test pcre2-posix pcre2-8) + endif() + + if(PCRE2_SUPPORT_JIT) + add_executable(pcre2_jit_test src/pcre2_jit_test.c) + set(PCRE2_JIT_TEST_LIBS) + if(PCRE2_BUILD_PCRE2_8) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-8) + endif() + if(PCRE2_BUILD_PCRE2_16) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-16) + endif() + if(PCRE2_BUILD_PCRE2_32) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-32) + endif() + target_link_libraries(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS}) + endif() + + # ================================================= # Write out a CTest configuration file # - FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest - "# This is a generated file. + file( + WRITE + ${PROJECT_BINARY_DIR}/CTestCustom.ctest + "# This is a generated file. MESSAGE(\"When testing is complete, review test output in the \\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\") MESSAGE(\" \") -") +" + ) - FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.sh - "#! /bin/sh + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_test.sh + "#! /bin/sh # This is a generated file. srcdir=${PROJECT_SOURCE_DIR} +pcre2test=${PROJECT_BINARY_DIR}/pcre2test +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test . ${PROJECT_SOURCE_DIR}/RunTest if test \"$?\" != \"0\"; then exit 1; fi # End -") +" + ) - IF(UNIX) - ADD_TEST(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh) - ENDIF(UNIX) + if(UNIX) + add_test(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh) + endif() - IF(PCRE2_BUILD_PCRE2GREP) - FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh - "#! /bin/sh + if(PCRE2_BUILD_PCRE2GREP) + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh + "#! /bin/sh # This is a generated file. srcdir=${PROJECT_SOURCE_DIR} +pcre2grep=${PROJECT_BINARY_DIR}/pcre2grep +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2grep=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2grep +pcre2test=${PROJECT_BINARY_DIR}/pcre2test +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test . ${PROJECT_SOURCE_DIR}/RunGrepTest if test \"$?\" != \"0\"; then exit 1; fi # End -") +" + ) - IF(UNIX) - ADD_TEST(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) - ENDIF(UNIX) - ENDIF(PCRE2_BUILD_PCRE2GREP) + if(UNIX) + add_test(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + endif() + endif() - IF(WIN32) + if(WIN32) # Provide environment for executing the bat file version of RunTest - FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc) - FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin) - FILE(TO_NATIVE_PATH ${PCRE2TEST_EXE} winexe) + file(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc) + file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin) - FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.bat - "\@REM This is a generated file. + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_test.bat + "\@REM This is a generated file. \@echo off setlocal SET srcdir=\"${winsrc}\" -# The next line was replaced by the following one after a user comment. -# SET pcre2test=\"${winexe}\" SET pcre2test=\"${winbin}\\pcre2test.exe\" if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" -call %srcdir%\\RunTest.Bat +call %srcdir%\\RunTest.bat if errorlevel 1 exit /b 1 echo RunTest.bat tests successfully completed -") +" + ) - ADD_TEST(NAME pcre2_test_bat - COMMAND pcre2_test.bat) - SET_TESTS_PROPERTIES(pcre2_test_bat PROPERTIES - PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed") + add_test(NAME pcre2_test_bat COMMAND pcre2_test.bat) + set_tests_properties(pcre2_test_bat PROPERTIES PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed") - IF("$ENV{OSTYPE}" STREQUAL "msys") + if(PCRE2_BUILD_PCRE2GREP) + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_grep_test.bat + "\@REM This is a generated file. +\@echo off +setlocal +SET srcdir=\"${winsrc}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" +SET pcre2grep=\"${winbin}\\pcre2grep.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2grep=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2grep.exe\" +call %srcdir%\\RunGrepTest.bat +if errorlevel 1 exit /b 1 +echo RunGrepTest.bat tests successfully completed +" + ) + + add_test(NAME pcre2_grep_test_bat COMMAND pcre2_grep_test.bat) + set_tests_properties( + pcre2_grep_test_bat + PROPERTIES PASS_REGULAR_EXPRESSION "RunGrepTest\\.bat tests successfully completed" + ) + endif() + + if("$ENV{OSTYPE}" STREQUAL "msys") # Both the sh and bat file versions of RunTest are run if make test is used # in msys - ADD_TEST(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh) - IF(PCRE2_BUILD_PCRE2GREP) - ADD_TEST(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) - ENDIF(PCRE2_BUILD_PCRE2GREP) - ENDIF("$ENV{OSTYPE}" STREQUAL "msys") - ENDIF(WIN32) + add_test(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh) + if(PCRE2_BUILD_PCRE2GREP) + add_test(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + endif() + endif() + endif() # Changed to accommodate testing whichever location was just built - IF(PCRE2_SUPPORT_JIT) - ADD_TEST(pcre2_jit_test pcre2_jit_test) - ENDIF(PCRE2_SUPPORT_JIT) - - IF(PCRE2_BUILD_PCRE2_8) - ADD_TEST(pcre2posix_test pcre2posix_test) - ENDIF(PCRE2_BUILD_PCRE2_8) + if(PCRE2_SUPPORT_JIT) + add_test(pcre2_jit_test pcre2_jit_test) + endif() -ENDIF(PCRE2_BUILD_TESTS) + if(PCRE2_BUILD_PCRE2_8) + add_test(pcre2posix_test pcre2posix_test) + endif() +endif() # Installation -SET(CMAKE_INSTALL_ALWAYS 1) +set(CMAKE_INSTALL_ALWAYS 1) -INSTALL(TARGETS ${targets} - RUNTIME DESTINATION bin - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" +install( + TARGETS ${TARGETS} + RUNTIME DESTINATION bin + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +install(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" DESTINATION bin # Set 0755 permissions - PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE +) -INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) +install(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) # CMake config files. -set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) +set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake) configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY) -set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) +set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake) configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY) -install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION cmake) - -FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html) -FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) -FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3) +install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION "${PCRE2_INSTALL_CMAKEDIR}") + +file(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html ${PROJECT_SOURCE_DIR}/doc/html/*.txt) +file( + GLOB txts + ${PROJECT_SOURCE_DIR}/doc/*.txt + AUTHORS.md + COPYING + ChangeLog + LICENCE.md + NEWS + README + SECURITY.md +) +file(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) +file(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3) -INSTALL(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) -INSTALL(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) -INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html) +install(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) +install(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) +install(FILES ${txts} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2) +install(FILES ${html} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2/html) -IF(MSVC AND INSTALL_MSVC_PDB) - INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo) - INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug) -ENDIF(MSVC AND INSTALL_MSVC_PDB) +if(MSVC AND INSTALL_MSVC_PDB) + install(FILES ${DLL_PDB_FILES} DESTINATION bin CONFIGURATIONS RelWithDebInfo) + install(FILES ${DLL_PDB_DEBUG_FILES} DESTINATION bin CONFIGURATIONS Debug) +endif() # Help, only for nice output -IF(BUILD_STATIC_LIBS) - SET(BUILD_STATIC_LIBS ON) -ELSE(BUILD_STATIC_LIBS) - SET(BUILD_STATIC_LIBS OFF) -ENDIF(BUILD_STATIC_LIBS) - -IF(PCRE2_HEAP_MATCH_RECURSE) - MESSAGE(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.") -ENDIF(PCRE2_HEAP_MATCH_RECURSE) - -IF(PCRE2_SHOW_REPORT) - STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype) - IF (CMAKE_C_FLAGS) - SET(cfsp " ") - ENDIF(CMAKE_C_FLAGS) - MESSAGE(STATUS "") - MESSAGE(STATUS "") - MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") - MESSAGE(STATUS "") - MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}") - MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}") - MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}") - MESSAGE(STATUS "") - MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}") - MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}") - MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}") - MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}") - MESSAGE(STATUS " Use SELinux allocator in JIT .... : ${PCRE2_SUPPORT_JIT_SEALLOC}") - MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}") - MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}") - MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") - MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}") - MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}") - MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}") - MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}") - MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}") - MESSAGE(STATUS " Maximum variable lookbehind ..... : ${PCRE2_MAX_VARLOOKBEHIND}") - MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}") - MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}") - MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}") - MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}") - MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") - MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") - MESSAGE(STATUS " with PIC enabled ............. : ${PCRE2_STATIC_PIC}") - MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") - MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}") - MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}") - MESSAGE(STATUS " Enable callout fork in pcre2grep. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}") - MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}") - MESSAGE(STATUS " Build tests (implies pcre2test .. : ${PCRE2_BUILD_TESTS}") - MESSAGE(STATUS " and pcre2grep)") - IF(ZLIB_FOUND) - MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}") - ELSE(ZLIB_FOUND) - MESSAGE(STATUS " Link pcre2grep with libz ........ : Library not found" ) - ENDIF(ZLIB_FOUND) - IF(BZIP2_FOUND) - MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : ${PCRE2_SUPPORT_LIBBZ2}") - ELSE(BZIP2_FOUND) - MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : Library not found" ) - ENDIF(BZIP2_FOUND) - IF(EDITLINE_FOUND) - MESSAGE(STATUS " Link pcre2test with libeditline . : ${PCRE2_SUPPORT_LIBEDIT}") - ELSE(EDITLINE_FOUND) - MESSAGE(STATUS " Link pcre2test with libeditline . : Library not found" ) - ENDIF(EDITLINE_FOUND) - IF(READLINE_FOUND) - MESSAGE(STATUS " Link pcre2test with libreadline . : ${PCRE2_SUPPORT_LIBREADLINE}") - ELSE(READLINE_FOUND) - MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" ) - ENDIF(READLINE_FOUND) - MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}") - IF(PCRE2_DISABLE_PERCENT_ZT) - MESSAGE(STATUS " Use %zu and %td ..................: OFF" ) - ELSE(PCRE2_DISABLE_PERCENT_ZT) - MESSAGE(STATUS " Use %zu and %td ..................: AUTO" ) - ENDIF(PCRE2_DISABLE_PERCENT_ZT) - - IF(MINGW AND BUILD_SHARED_LIBS) - MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}") - MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}") - ENDIF(MINGW AND BUILD_SHARED_LIBS) - - IF(MSVC) - MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}") - ENDIF(MSVC) - - MESSAGE(STATUS "") -ENDIF(PCRE2_SHOW_REPORT) +if(BUILD_STATIC_LIBS) + set(BUILD_STATIC_LIBS ON) +else() + set(BUILD_STATIC_LIBS OFF) +endif() + +if(PCRE2_HEAP_MATCH_RECURSE) + message(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.") +endif() + +if(PCRE2_SHOW_REPORT) + message(STATUS "") + message(STATUS "") + message(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") + message(STATUS "") + message(STATUS " Install prefix .................... : ${CMAKE_INSTALL_PREFIX}") + message(STATUS " C compiler ........................ : ${CMAKE_C_COMPILER}") + + if(CMAKE_C_FLAGS) + set(CFSP " ") + endif() + if(CMAKE_CONFIGURATION_TYPES) + foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES) + string(TOUPPER "${config}" buildtype) + string(LENGTH " (${config})" buildtypelen) + math(EXPR dotslen "18 - ${buildtypelen}") + string(REPEAT "." ${dotslen} dots) + message(STATUS " C compiler flags (${config}) ${dots} : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}") + endforeach() + else() + string(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype) + message(STATUS " C compiler flags .................. : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}") + endif() + + message(STATUS "") + if(CMAKE_CONFIGURATION_TYPES) + message(STATUS " Build configurations .............. : ${CMAKE_CONFIGURATION_TYPES}") + else() + message(STATUS " Build type ........................ : ${CMAKE_BUILD_TYPE}") + endif() + message(STATUS " Build 8 bit PCRE2 library ......... : ${PCRE2_BUILD_PCRE2_8}") + message(STATUS " Build 16 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_16}") + message(STATUS " Build 32 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_32}") + message(STATUS " Include debugging code ............ : ${PCRE2_DEBUG}") + message(STATUS " Enable JIT compiling support ...... : ${PCRE2_SUPPORT_JIT}") + message(STATUS " Use SELinux allocator in JIT ...... : ${PCRE2_SUPPORT_JIT_SEALLOC}") + message(STATUS " Enable Unicode support ............ : ${PCRE2_SUPPORT_UNICODE}") + message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}") + message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") + message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}") + message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}") + message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}") + message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}") + message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}") + message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}") + message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}") + message(STATUS " Heap limit ........................ : ${PCRE2_HEAP_LIMIT}") + message(STATUS " Match limit ....................... : ${PCRE2_MATCH_LIMIT}") + message(STATUS " Match depth limit ................. : ${PCRE2_MATCH_LIMIT_DEPTH}") + message(STATUS " Build shared libs ................. : ${BUILD_SHARED_LIBS}") + message(STATUS " Build static libs ................. : ${BUILD_STATIC_LIBS}") + message(STATUS " with PIC enabled ............... : ${PCRE2_STATIC_PIC}") + message(STATUS " Build pcre2grep ................... : ${PCRE2_BUILD_PCRE2GREP}") + message(STATUS " Enable JIT in pcre2grep ........... : ${PCRE2GREP_SUPPORT_JIT}") + message(STATUS " Enable callouts in pcre2grep ...... : ${PCRE2GREP_SUPPORT_CALLOUT}") + message(STATUS " Enable callout fork in pcre2grep .. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}") + message(STATUS " Buffer size for pcre2grep ......... : ${PCRE2GREP_BUFSIZE}") + message(STATUS " Build tests (implies pcre2test .... : ${PCRE2_BUILD_TESTS}") + message(STATUS " and pcre2grep)") + if(ZLIB_FOUND) + message(STATUS " Link pcre2grep with libz .......... : ${PCRE2_SUPPORT_LIBZ}") + else() + message(STATUS " Link pcre2grep with libz .......... : Library not found") + endif() + if(BZIP2_FOUND) + message(STATUS " Link pcre2grep with libbz2 ........ : ${PCRE2_SUPPORT_LIBBZ2}") + else() + message(STATUS " Link pcre2grep with libbz2 ........ : Library not found") + endif() + if(EDITLINE_FOUND) + message(STATUS " Link pcre2test with libeditline ... : ${PCRE2_SUPPORT_LIBEDIT}") + else() + message(STATUS " Link pcre2test with libeditline ... : Library not found") + endif() + if(READLINE_FOUND) + message(STATUS " Link pcre2test with libreadline ... : ${PCRE2_SUPPORT_LIBREADLINE}") + else() + message(STATUS " Link pcre2test with libreadline ... : Library not found") + endif() + message(STATUS " Support Valgrind .................. : ${PCRE2_SUPPORT_VALGRIND}") + if(PCRE2_DISABLE_PERCENT_ZT) + message(STATUS " Use %zu and %td ................... : OFF") + else() + message(STATUS " Use %zu and %td ................... : AUTO") + endif() + + if(MINGW AND BUILD_SHARED_LIBS) + message(STATUS " Non-standard dll names (prefix) ... : ${NON_STANDARD_LIB_PREFIX}") + message(STATUS " Non-standard dll names (suffix) ... : ${NON_STANDARD_LIB_SUFFIX}") + endif() + + if(MSVC) + message(STATUS " Install MSVC .pdb files ........... : ${INSTALL_MSVC_PDB}") + endif() + + message(STATUS "") +endif() # end CMakeLists.txt diff --git a/ChangeLog b/ChangeLog index ea228c1..685631c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,194 @@ Change Log for PCRE2 Before the move to GitHub, this was the only record of changes to PCRE2. Now there is also the log of commit messages. +Internal changes which are not visible to clients of the library are mostly not +listed here. + +Version 10.45-RC1 27-December-2024 +---------------------------------- + +1. (#418) Change 6 of 10.44 broke 32-bit tests because pcre2test's reporting of +memory size was changed to the entire compiled data block, instead of just the +pattern and tables data, so as to align with the new length restriction. +Because the block's header contains pointers, this meant the pcre2test output +was different in 32-bit mode. A patch by Carlo reverts to the previous state +and makes sure that any limit set by pcre2_set_max_pattern_compiled_length() +also avoids the internal struct overhead. + +2. (#416, #622) Updates to build.zig. + +3. (#427, et al.) Various fixes to pacify static analyzers. + +4. (#428) Add --posix-pattern-file to pcre2grep to allow processing of empty +patterns through the -f option, as well as patterns that end in space +characters, for compatibility with other grep tools. + +5. (4fa5b8bd) Fix a bug in the fuzz support quantifier-limiting code. It ignores +strings of more than 5 digits because they are necessarily numbers greater than +65535, the largest legal quantifier. However, it wasn't ignoring non-significant +leading zeros. + +6. (6d82f0cd) The case-independent processing of the letter-matching Unicode +properties Ll, Lt, and Lu have been changed to match Perl (which changed a while +ago). When caseless matching is in force, all three of these properties are now +treated as Lc (cased letter). + +7. (#433) The pcre2_jit_compile() function was updated by the addition of a new +option PCRE2_JIT_TEST_ALLOC which, if called with a NULL first argument, tests +not only the availability of JIT, but also its ability to allocate executable +memory. Update pcre2test to use this support to extend the -C option. + +8. (75b1025a) The code for parsing Unicode property descriptions for \p and \P +been changed as follows: + + . White space etc. before ^ in a negated value such as \p{ ^L } was not being + ignored. + + . The code wouldn't have worked if PCRE2 was compiled for UTF-8 support + within an EBCDIC environment. Possibly nobody does this any more, but it + should now work. + + . The documentation of the syntax of what can follow \p and \P has been + updated. + +9. (1c24ba01) There was an error in the table of lengths for parsed items for +the OPTIONS item, but fortuitously it could never have actually bitten. While +fixing this, some other code that could never be obeyed was discovered and +removed. + +10. (674b6640) Removed some incorect optimization code from DFA matching that +has been there since PCRE1, but has just been found to cause a no match return +instead of a partial match in some cases. It involves partial matching when (*F) +is present so is unlikely to have actually affected anyone. + +11. (b0f4ac17) Tidy the wording and formatting of some pcre2test error messages +concerned with bad modifiers. Also restrict single-letter modifier sequences to +the first item in a modifier list, as documented and always intended. + +12. (1415565c) An iterator at the end of many assertions can always be +auto-possessified, but not at the end of variable-length lookbehinds. There was +a bug in the code that checks for such a lookbehind; it was looking only at the +first branch, which is wrong because some branches can be fixed length when +others are not, for example (?<=AB|CD?). Now all branches are checked for +variability. + +13. (ead08288) Matching with pcre2_match() could give an incorrect result if a +variable-length lookbehind was used as the condition in a conditional group. +The condition could erroneously be treated as true if a branch matched but +overran the current position. This bug was in the interpreter only; matching +with JIT was correct. + +14. (#443) Split out the sljit sub-project into a "Git submodule". Git users +must now run `git submodule init; git submodule update` after a Git checkout, or +the build will fail due to missing files in deps/sljit. + +15. (#441) Add a new error code (PCRE2_ERROR_JIT_UNSUPPORTED) which is yielded +for unsupported jit features. + +16. (#444) Fix bug in 'first code unit' and 'last code unit' optimization +combined with lookahead assertions. + +17. (#445, #447, #449, #451, #452, #459, #563) Add a new feature called scan +substring. This feature is a new type of assertion which matches the content of +a capturing block to a sub-pattern. + +18. (#450) Improvements to 'first code unit' / 'starting code units' +optimisation. + +19. (#455) Many, many improvements to the JIT compiler. + +20. Item 43 of 10.43 was incomplete because it addressed only \z and not \Z, +which was still misbehaving when matching fragments inside invalid UTF strings. + +21. (d29e7290) Octal escapes of the form \045 or \111 were not being recognized +in substitution strings, and if encountered gave an error, though the \o{...} +form was recognized. This bug is now fixed. + +22. (#463, #487) Fix 1 byte out-of-bounds read when parsing malformed limits +(e.g. LIMIT_HEAP) + +23. Many improvements to test infrastructure. Many more platforms and +configurations are now run in Continuous Integration, and all the platforms now +run the full test suite, rather than a partial subset. + +24. (#475) Implement title casing in substitution strings using Perl syntax. + +25. (#478, #504) Disallow \x if not followed by { or a hex digit. + +26. (#473) Implements Python-style backrefs in substitutions. + +27. (#472) Fix error reporting for certain over-large octal escapes. + +28. (#482) Fix parsing of named captures in replacement strings, allowing +non-ASCII capture names to be used. + +29. (#477, #474, #488, #494, #496, #506, #508, #511, #518, #524, #540) Many +improvements to parsing and optimising of character classes. + +30. (#483, #498) Add support for \g and $ to replacement strings. + +31. (#470) Add option flags PCRE2_EXTRA_NO_BS0 and PCRE2_EXTRA_PYTHON_OCTAL. + +32. (#471) Add new API function pcre2_set_optimize() for controlling which +optimizations are enabled. + +33. (#491) Adds $& $` $' and $_ to substitution replacements, as well as +interpreting \b and \v as characters. + +34. (#499) Add option PCRE2_EXTRA_NEVER_CALLOUT to disable callouts. + +35. (#503, #513) Update Unicode support to UCD 16. + +36. (#512, #618, #638) Add new function pcre2_set_substitute_case_callout() to +allow clients to provide a custom callback with locale-aware case +transformation. + +37. (#516) Fix case-insensitive matching of backreferences when using the +PCRE2_EXTRA_CASELESS_RESTRICT option. + +38. (#519) In pcre2grep, add $& as an alias for $0 + +39. (c9bf8339, #534) Updated perltest.sh to enable locale setting. + +40. (#521) Add support for Turkish I casefolding, using new options +PCRE2_EXTRA_TURKISH_CASING, and added pre-pattern flags (*TURKISH_CASING) and +(*CASELESS_RESTRICT). + +41. (#523, #546, #547) Add support for UTS#18 compatible character classes, +using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a metacharacter +within character classes and the operators '&&', '--' and '~~', allowing +subtractions and intersections of character classes to be easily expressed. + +42. (#553, #586, #596, #597) Add support for Perl-style extended character +classes, using the syntax (?[...]). This also allows expressing subtractions and +intersections of character classes, but using a different syntax to UTS#18. + +43. (#554) Fixed a bug in JIT affecting greedy bounded repeats. The upper limit +of repeats inside a repeated bracket might be incorrectly checked. + +44. (#556) Fixed a bug in JIT affecting caseful matching of backreferences. When +utf is disabled, and dupnames is enabled, caseless matching was used even +if caseful matching was needed. + +45. (f34fc0a3) Fixed a bug in pcre2grep reported by Alejandro Colomar + (GitHub issue #577). In certain cases, when lines of above and +below context were contiguous, a separator line was incorrectly being inserted. + +46. (#594) Fix a small (one/two byte) out-of-bounds read on invalid UTF-8 input +in pcre2grep. + +47. (#370) Fix the INSTALL_MSVC_PDB CMake flag. + +48. (#366) Install cmake files in prefix/lib/cmake/pcre2 rather than +prefix/cmake. The new CMake flag PCRE2_INSTALL_CMAKEDIR allows customising this +location. + +49. (#624, #626, #628, #632, #639, #641) Reduce code size of generated JIT code +for repeated character classes. + +50. (#623) Update the Bazel build files. + + Version 10.44 07-June-2024 -------------------------- diff --git a/CheckMan b/CheckMan deleted file mode 100755 index 2f84f99..0000000 --- a/CheckMan +++ /dev/null @@ -1,78 +0,0 @@ -#! /usr/bin/perl - -# A script to scan PCRE2's man pages to check for typos in the control -# sequences. I use only a small set of the available repertoire, so it is -# straightforward to check that nothing else has slipped in by mistake. This -# script should be called in the doc directory. - -$yield = 0; - -while (scalar(@ARGV) > 0) - { - $line = 0; - $file = shift @ARGV; - - open (IN, $file) || die "Failed to open $file\n"; - - while () - { - $count = 0; - $line++; - if (/^\s*$/) - { - printf "Empty line $line of $file\n"; - $yield = 1; - } - elsif (/^\./) - { - if (!/^\.\s*$| - ^\.B\s+\S| - ^\.TH\s\S| - ^\.SH\s\S| - ^\.SS\s\S| - ^\.TP(?:\s?\d+)?\s*$| - ^\.SM\s*$| - ^\.br\s*$| - ^\.rs\s*$| - ^\.sp\s*$| - ^\.nf\s*$| - ^\.fi\s*$| - ^\.P\s*$| - ^\.PP\s*$| - ^\.\\"(?:\ HREF)?\s*$| - ^\.\\"\sHTML\s\s*$| - ^\.\\"\sHTML\s<\/a>\s*$| - ^\.\\"\s<\/a>\s*$| - ^\.\\"\sJOINSH\s*$| - ^\.\\"\sJOIN\s*$/x - ) - { - printf "Bad control line $line of $file\n"; - $yield = 1; - } - } - elsif (/\\[^ef]|\\f[^IBP]/) - { - printf "Bad backslash in line $line of $file\n"; - $yield = 1; - } - while (/\\f[BI]/g) - { - $count++; - } - while (/\\fP/g) - { - $count--; - } - if ($count != 0) - { - printf "Mismatching formatting in line $line of $file\n"; - $yield = 1; - } - } - - close(IN); - } - -exit $yield; -# End diff --git a/CleanTxt b/CleanTxt deleted file mode 100755 index 1f42519..0000000 --- a/CleanTxt +++ /dev/null @@ -1,113 +0,0 @@ -#! /usr/bin/perl -w - -# Script to take the output of nroff -man and remove all the backspacing and -# the page footers and the screen commands etc so that it is more usefully -# readable online. In fact, in the latest nroff, intermediate footers don't -# seem to be generated any more. - -$blankcount = 0; -$lastwascut = 0; -$firstheader = 1; - -# Input on STDIN; output to STDOUT. - -while () - { - s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" - s/.\x8//g; # Remove "char, backspace" - - # Handle header lines. Retain only the first one we encounter, but remove - # the blank line that follows. Any others (e.g. at end of document) and the - # following blank line are dropped. - - if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) - { - if ($firstheader) - { - $firstheader = 0; - print; - $lastprinted = $_; - $lastwascut = 0; - } - $_=; # Remove a blank that follows - next; - } - - # Count runs of empty lines - - if (/^\s*$/) - { - $blankcount++; - $lastwascut = 0; - next; - } - - # If a chunk of lines has been cut out (page footer) and the next line - # has a different indentation, put back one blank line. - - if ($lastwascut && $blankcount < 1 && defined($lastprinted)) - { - ($a) = $lastprinted =~ /^(\s*)/; - ($b) = $_ =~ /^(\s*)/; - $blankcount++ if ($a ne $b); - } - - # We get here only when we have a non-blank line in hand. If it was preceded - # by 3 or more blank lines, read the next 3 lines and see if they are blank. - # If so, remove all 7 lines, and remember that we have just done a cut. - - if ($blankcount >= 3) - { - for ($i = 0; $i < 3; $i++) - { - $next[$i] = ; - $next[$i] = "" if !defined $next[$i]; - $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" - $next[$i] =~ s/.\x8//g; # Remove "char, backspace" - } - - # Cut out chunks of the form <3 blanks><3 blanks> - - if ($next[0] =~ /^\s*$/ && - $next[1] =~ /^\s*$/ && - $next[2] =~ /^\s*$/) - { - $blankcount -= 3; - $lastwascut = 1; - } - - # Otherwise output the saved blanks, the current, and the next three - # lines. Remember the last printed line. - - else - { - for ($i = 0; $i < $blankcount; $i++) { print "\n"; } - print; - for ($i = 0; $i < 3; $i++) - { - $next[$i] =~ s/.\x8//g; - print $next[$i]; - $lastprinted = $_; - } - $lastwascut = 0; - $blankcount = 0; - } - } - - # This non-blank line is not preceded by 3 or more blank lines. Output - # any blanks there are, and the line. Remember it. Force two blank lines - # before headings. - - else - { - $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && - defined($lastprinted); - for ($i = 0; $i < $blankcount; $i++) { print "\n"; } - print; - $lastprinted = $_; - $lastwascut = 0; - $blankcount = 0; - } - } - -# End diff --git a/Detrail b/Detrail deleted file mode 100755 index 1c5c7e9..0000000 --- a/Detrail +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/perl - -# This is a script for removing trailing whitespace from lines in files that -# are listed on the command line. - -# This subroutine does the work for one file. - -sub detrail { -my($file) = $_[0]; -my($changed) = 0; -open(IN, "$file") || die "Can't open $file for input"; -@lines = ; -close(IN); -foreach (@lines) - { - if (/\s+\n$/) - { - s/\s+\n$/\n/; - $changed = 1; - } - } -if ($changed) - { - open(OUT, ">$file") || die "Can't open $file for output"; - print OUT @lines; - close(OUT); - } -} - -# This is the main program - -$, = ""; # Output field separator -for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); } - -# End diff --git a/HACKING b/HACKING index b9fa81c..65de9e3 100644 --- a/HACKING +++ b/HACKING @@ -21,41 +21,27 @@ form, and were quite restricted in what they could do by comparison with Perl. The interesting part about the algorithm was that the amount of space required to hold the compiled form of an expression was known in advance. The code to apply an expression did not operate by backtracking, as the original Henry -Spencer code and current PCRE2 and Perl code does, but instead checked all -possibilities simultaneously by keeping a list of current states and checking -all of them as it advanced through the subject string. In the terminology of -Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a -traditional Finite State Machine (FSM). When the pattern was all used up, all -remaining states were possible matches, and the one matching the longest subset -of the subject string was chosen. This did not necessarily maximize the -individual wild portions of the pattern, as is expected in Unix and Perl-style -regular expressions. +Spencer code and the current PCRE2 pcre2_match() function and Perl code do, but +instead checked all possibilities simultaneously by keeping a list of current +states and checking all of them as it advanced through the subject string. In +the terminology of Jeffrey Friedl's book, it was a "DFA algorithm", though it +was not a traditional Finite State Machine (FSM). When the pattern was all used +up, all remaining states were possible matches, and the one matching the +longest subset of the subject string was chosen. This did not necessarily +maximize the individual wild portions of the pattern, as is expected in Unix +and Perl-style regular expressions. Historical note 2 ----------------- -By contrast, the code originally written by Henry Spencer (which was -subsequently heavily modified for Perl) compiles the expression twice: once in -a dummy mode in order to find out how much store will be needed, and then for -real. (The Perl version may or may not still do this; I'm talking about the -original library.) The execution function operates by backtracking and -maximizing (or, optionally, minimizing, in Perl) the amount of the subject that -matches individual wild portions of the pattern. This is an "NFA algorithm" in -Friedl's terminology. - - -OK, here's the real stuff -------------------------- - -For the set of functions that formed the original PCRE1 library in 1997 (which -are unrelated to those mentioned above), I tried at first to invent an -algorithm that used an amount of store bounded by a multiple of the number of -characters in the pattern, to save on compiling time. However, because of the -greater complexity in Perl regular expressions, I couldn't do this, even though -the then current Perl 5.004 patterns were much simpler than those supported -nowadays. In any case, a first pass through the pattern is helpful for other -reasons. +The code originally written by Henry Spencer (which was subsequently heavily +modified for Perl) compiles the expression twice: once in a dummy mode in order +to find out how much store will be needed, and then for real. (The Perl version +may or may not still do this; I'm talking about the original library.) The +execution function operates by backtracking and maximizing (or, optionally, +minimizing, in Perl) the amount of the subject that matches individual wild +portions of the pattern. This is an "NFA algorithm" in Friedl's terminology. Support for 16-bit and 32-bit data strings @@ -98,8 +84,8 @@ were also present in the 7.0 release). A side effect of this work was that the previous limit of 200 on the nesting depth of parentheses was removed. However, there was a downside: compiling ran more slowly than before (30% or more, depending on the pattern) because it now -did a full analysis of the pattern. My hope was that this would not be a big -issue, and in the event, nobody has commented on it. +did a full analysis of the pattern twice. My hope was that this would not be a +big issue, and in the event, nobody has commented on it. At release 8.34, a limit on the nesting depth of parentheses was re-introduced (default 250, settable at build time) so as to put a limit on the amount of @@ -119,7 +105,7 @@ memory.) The use of duplicate group numbers (the (?| facility) also caused issues. To get around these problems I adopted a new approach by adding a third pass -over the pattern (really a "pre-pass"), which did nothing other than identify +over the pattern (really a "pre-pass"), which does nothing other than identify all the named subpatterns and their corresponding group numbers. This means that the actual compile (both the memory-computing dummy run and the real compile) has full knowledge of group names and numbers throughout. Several @@ -154,17 +140,21 @@ assumption is made that there will be a callout for each pattern code unit at the end. A default parsed pattern vector is defined on the system stack, to minimize memory handling, but if this is not big enough, heap memory is used. -As before, the actual compiling function is run twice, the first time to -determine the amount of memory needed for the final compiled pattern. It -now processes the parsed pattern vector, not the pattern itself, although some -of the parsed items refer to strings in the pattern - for example, group -names. As escapes and comments have already been processed, the code is a bit -simpler than before. +If there are any lookbehinds in the pattern, the parsed pattern is scanned in +order to work out their lengths. Then the actual compiling function is run +twice, the first time to determine the amount of memory needed for the final +compiled pattern. The compiling function processes the parsed pattern vector, +not the pattern itself, although some of the parsed items refer to strings in +the pattern - for example, group names. -Most errors can be diagnosed during the parsing scan. For those that cannot -(for example, "lookbehind assertion is not fixed length"), the parsed code -contains offsets into the pattern so that the actual compiling code can -report where errors are. +Some post-processing of the compiled pattern takes place. If there are any +recursion or subroutine calls, there is a scan to convert them into offsets. +Then there are other scans to apply certain optimizations, some of which can be +disabled by setting appropriate options. + +Most errors can be diagnosed during the parsing scan. For those that cannot, +the parsed code contains offsets into the pattern so that the actual compiling +code can report where the errors are. The elements of the parsed pattern vector @@ -209,6 +199,11 @@ META_RANGE_ESCAPED hyphen in class range with at least one escape META_RANGE_LITERAL hyphen in class range defined literally META_SKIP (*SKIP) - no argument (see below for with argument) META_THEN (*THEN) - no argument (see below for with argument) +META_ECLASS_AND && (or &) in an extended character class +META_ECLASS_OR || (or |, +) in an extended character class +META_ECLASS_SUB -- (or -) in an extended character class +META_ECLASS_XOR ~~ (or ^) in an extended character class +META_ECLASS_NOT ! in an extended character class The two RANGE values occur only in character classes. They are positioned between two literals that define the start and end of the range. In an EBCDIC @@ -240,11 +235,11 @@ occurrence is useful). On 64-bit systems this avoids using more than two parsed pattern elements for items such as \3. The offset is used when an error occurs because the reference is to a non-existent group. -META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next -element contains the 16-bit type and data property values, packed together. -ESC_g and ESC_k are used only for named references - numerical ones are turned -into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed -by a length and an offset into the pattern to specify the name. +META_ESCAPE is used for escapes such as \d that match a character. It has an +ESC_xxx value as its data. For ESC_P and ESC_p, the next element contains the +16-bit type and data property values, packed together. Escape sequences such as +\g and \k are turned into other items like META_RECURSE or META_BACKREF and +their ESC_xxx values never occur with META_ESCAPE. The following have one data item that follows in the next vector element: @@ -268,15 +263,17 @@ META_COND_NAME (?() or (?('name') or (?(name) META_COND_RNAME (?(R&name) META_COND_RNUMBER (?(Rdigits) META_RECURSE_BYNAME (?&name) -META_BACKREF_BYNAME \k'name' +META_BACKREF_BYNAME \k'name' or \k or \k{name} or \g{name} +META_SCS_NAME (*scs:()...) META_COND_RNUMBER is used for names that start with R and continue with digits, because this is an ambiguous case. It could be a back reference to a group with that name, or it could be a recursion test on a numbered group. -This one is followed by an offset, for use in error messages, then a number: +These are followed by an offset, for use in error messages, then a number: META_COND_NUMBER (?([+-]digits) +META_SCS_NUMBER (*scs:(digits)...) The following is followed just by an offset, for use in error messages: @@ -286,7 +283,7 @@ The following are at first also followed just by an offset for use in error messages. After the lengths of the branches of a lookbehind group have been checked the error offset is no longer needed. The lower 16 bits of the main word are now set to the maximum length of the first branch of the lookbehind -group, and the second word is set to the mimimum matching length for a +group, and the second word is set to the minimum matching length for a variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose branches are all of fixed length. These values are used when generating OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used @@ -336,16 +333,28 @@ will use most of the time. If PCRE2 is compiled with just-in-time (JIT) support, and studying a compiled pattern with JIT is successful, the JIT code is run instead of the normal pcre2_match() code, but the result is the same. +The interpreter used to implement backtracking by means of recursive function +calls, but this gave rise to regular complaints when patterns with large search +trees ran out of stack. There was for a while a fudge that used the heap +instead, but this was inefficient and slow. In 2017 I re-wrote pcre2_match() as +a single, non-recursive function that implements backtracking via a vector of +"frames" on the heap, each frame representing a backtracking point. As well as +standard information such as the position in the pattern and position in the +subject, each frame has a number of unassigned variables that can be used +locally to preserve values at a backtracking point. C macros are used +extensively to implement all of this. + Supplementary matching function ------------------------------- -There is also a supplementary matching function called pcre2_dfa_match(). This +There is a supplementary matching function called pcre2_dfa_match() that implements a DFA matching algorithm that searches simultaneously for all possible matches that start at one point in the subject string. (Going back to my roots: see Historical Note 1 above.) This function intreprets the same compiled pattern data as pcre2_match(); however, not all the facilities are -available, and those that are do not always work in quite the same way. See the +available, and those that are do not always work in quite the same way. In +particular, capturing parentheses and backreferences are not supported. See the user documentation for details. The algorithm that is used for pcre2_dfa_match() is not a traditional FSM, @@ -361,8 +370,10 @@ Changeable options The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and some others may be changed in the middle of patterns by items such as (?i). Their processing is handled entirely at compile time by generating different -opcodes for the different settings. The runtime functions do not need to keep -track of an option's state. +opcodes for the different settings. Some options are copied into the opcode's +data, for opcodes such as OP_REFI which depends on the (?r) +(PCRE2_EXTRA_CASELESS_RESTRICT) option. The runtime functions do not need to +keep track of an option's state. PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE are tracked and processed during the parsing pre-pass. The others are handled @@ -383,10 +394,10 @@ within the compiled pattern. LINK_SIZE always specifies a number of bytes. The default value for LINK_SIZE is 2, except for the 32-bit library, where it can only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values, and the 16-bit library can be compiled to use 4-byte values, though this -impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries is -necessary only when patterns whose compiled length is greater than 65535 code -units are going to be processed. When a LINK_SIZE value uses more than one code -unit, the most significant unit is first. +impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries +is necessary only when patterns whose compiled length is greater than 65535 +code units are going to be processed. When a LINK_SIZE value uses more than one +code unit, the most significant unit is first. In this description, we assume the "normal" compilation options. Data values that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode @@ -396,7 +407,7 @@ that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode Opcodes with no following data ------------------------------ -These items are all just one unit long: +These items are all just one code unit long: OP_END end of pattern OP_ANY match any one character other than newline @@ -594,9 +605,13 @@ do. For classes containing characters with values greater than 255 or that contain \p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable -code points are less than 256, followed by a list of pairs (for a range) and/or -single characters and/or properties. In caseless mode, all equivalent -characters are explicitly listed. +code points are less than 256. After the bit map, the properties of the +character class are listed, if they are present. The items in the list +follows the declaration order of the pattern string. The property list +is followed by single characters and/or character ranges, if they are +present. The characters/ranges are sorted in ascending order, and at +least one non-matching character must be present between any two of +them. In caseless mode, all equivalent characters are explicitly listed. OP_XCLASS is followed by a LINK_SIZE value containing the total length of the opcode and its data. This is followed by a code unit containing flag bits: @@ -618,6 +633,42 @@ When XCL_NOT is set, the bit map, if present, contains bits for characters that are allowed (exactly as for OP_NCLASS), but the list of items that follow it specifies characters and properties that are not allowed. +The meaning of the bitmap indicated by XCL_MAP is that, if one is present, then +it fully describes which code points < 256 match the class (without needing to +invert the check according to XCL_NOT); the other items in the OP_XCLASS need +not be consulted. However, if a bitmap is not present, then code points < 256 +may still match, so the other items in the OP_XCLASS must be consulted. + +For classes containing logical expressions, such as "[\p{Greek} && \p{Lu}]" for +"uppercase Greek letters", OP_ECLASS is used. The expression is encoded as a a +stack-based series of operands and operators, in Reverse Polish Notation. Like +an OP_XCLASS, the OP_ECLASS is first followed by a LINK_SIZE value containing +the total length of the opcode and its data. That is followed by a code unit +containing flags: currently just ECL_MAP indicating that a bit map is present. +There follows the bit map, if ECL_MAP is set. Finally, there is a sequence of +items that are either an operand or operator. Each item starts with a single +code unit containing its type: + + ECL_AND AND; no additional data + ECL_OR OR; no additional data + ECL_XOR XOR; no additional data + ECL_NOT NOT; no additional data + ECL_XCLASS The additional data which follows ECL_XCLASS is the same as for + an OP_XCLASS, except that this data is preceded by ECL_XCLASS + rather than OP_XCLASS. + Because the OP_ECLASS has its own bitmap (if required), an + ECL_XCLASS should not contain a bitmap. + +Additionally, there are two intermediate values used during compilation, but +these are folded away during generation of the opcode, and so never appear +inside an OP_ECLASS at match time. They are: + + ECL_ANY match all characters; no additional data + ECL_NONE match no characters; no additional data + +The meaning of the bitmap indicated by ECL_MAP is the same as XCL_MAP. +If the bitmap is present, all codepoints < 256 are checked against the bitmap. + Back references --------------- @@ -631,6 +682,9 @@ generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index required name, followed by the number of groups with the same name. The matching code can then search for the first one that is set. +OP_REFI and OP_DNREFI are further followed by an item containing any +case-insensitivity flags. + Repeating character classes and back references ----------------------------------------------- @@ -750,6 +804,16 @@ In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are also the number of code units, but in UTF-8/16 mode each character may occupy more than one code unit. +The "scan substring" assertion compiles as OP_ASSERT_SCS. This opcode is +followed by a list of arguments. Each argument is either an OP_CREF or +OP_DNCREF byte code sequence. The details of these sequences are described +in the next section. + +For example (*scs:(1,'NAME')...PATTERN...) is translated to: +[OP_ASSERT_SCS] [OP_CREF] [OP_CREF] ...PATTERN... [OP_KET] + +If 'NAME' is a duplicated name, the second [OP_CREF] is [OP_DNCREF] instead. + Conditional subpatterns ----------------------- @@ -849,5 +913,12 @@ The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is not a real opcode, but is used to check at compile time that tables indexed by opcode are the correct length, in order to catch updating errors. + +See also +-------- + +The file maint/README contains additional information. + + Philip Hazel -November 2023 +August 2024 diff --git a/LICENCE b/LICENCE.md similarity index 55% rename from LICENCE rename to LICENCE.md index 3c1ef03..f58ceb7 100644 --- a/LICENCE +++ b/LICENCE.md @@ -1,5 +1,8 @@ -PCRE2 LICENCE -------------- +PCRE2 License +============= + +| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception | +|---------|-------| PCRE2 is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. @@ -16,40 +19,46 @@ optimize pattern matching. This is an optional feature that can be omitted when the library is built. -THE BASIC LIBRARY FUNCTIONS ---------------------------- +COPYRIGHT +--------- + +### The basic library functions -Written by: Philip Hazel -Email local part: Philip.Hazel -Email domain: gmail.com + Written by: Philip Hazel + Email local part: Philip.Hazel + Email domain: gmail.com -Retired from University of Cambridge Computing Service, -Cambridge, England. + Retired from University of Cambridge Computing Service, + Cambridge, England. -Copyright (c) 1997-2024 University of Cambridge -All rights reserved. + Copyright (c) 1997-2007 University of Cambridge + Copyright (c) 2007-2024 Philip Hazel + All rights reserved. +### PCRE2 Just-In-Time compilation support -PCRE2 JUST-IN-TIME COMPILATION SUPPORT --------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Email domain: freemail.hu -Written by: Zoltan Herczeg -Email local part: hzmester -Email domain: freemail.hu + Copyright (c) 2010-2024 Zoltan Herczeg + All rights reserved. -Copyright(c) 2010-2024 Zoltan Herczeg -All rights reserved. +### Stack-less Just-In-Time compiler + Written by: Zoltan Herczeg + Email local part: hzmester + Email domain: freemail.hu -STACK-LESS JUST-IN-TIME COMPILER --------------------------------- + Copyright (c) 2009-2024 Zoltan Herczeg + All rights reserved. -Written by: Zoltan Herczeg -Email local part: hzmester -Email domain: freemail.hu +### All other contributions -Copyright(c) 2009-2024 Zoltan Herczeg -All rights reserved. +Many other contributors have participated in the authorship of PCRE2. As PCRE2 +has never required a Contributor Licensing Agreement, or other copyright +assignment agreement, all contributions have copyright retained by each +original contributor or their employer. THE "BSD" LICENCE @@ -58,16 +67,16 @@ THE "BSD" LICENCE Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notices, - this list of conditions and the following disclaimer. +* Redistributions of source code must retain the above copyright notices, + this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notices, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. +* Redistributions in binary form must reproduce the above copyright + notices, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. - * Neither the name of the University of Cambridge nor the names of any - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. +* Neither the name of the University of Cambridge nor the names of any + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..ee4dd36 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,9 @@ +module( + name = "pcre2", + version = "10.45-RC1", + compatibility_level = 1, +) + +bazel_dep(name = "rules_cc", version = "0.0.1") +bazel_dep(name = "bazel_skylib", version = "1.2.1") +bazel_dep(name = "platforms", version = "0.0.4") diff --git a/Makefile.am b/Makefile.am index ca6a638..8aaa0cc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,12 +10,13 @@ AM_CPPFLAGS="-I$(srcdir)/src" ## Specify the documentation files that are distributed. dist_doc_DATA = \ - AUTHORS \ + AUTHORS.md \ COPYING \ ChangeLog \ - LICENCE \ + LICENCE.md \ NEWS \ README \ + SECURITY.md \ doc/pcre2.txt \ doc/pcre2-config.txt \ doc/pcre2grep.txt \ @@ -86,11 +87,13 @@ dist_html_DATA = \ doc/html/pcre2_set_max_pattern_length.html \ doc/html/pcre2_set_max_varlookbehind.html \ doc/html/pcre2_set_offset_limit.html \ + doc/html/pcre2_set_optimize.html \ doc/html/pcre2_set_newline.html \ doc/html/pcre2_set_parens_nest_limit.html \ doc/html/pcre2_set_recursion_limit.html \ doc/html/pcre2_set_recursion_memory_management.html \ doc/html/pcre2_set_substitute_callout.html \ + doc/html/pcre2_set_substitute_case_callout.html \ doc/html/pcre2_substitute.html \ doc/html/pcre2_substring_copy_byname.html \ doc/html/pcre2_substring_copy_bynumber.html \ @@ -185,11 +188,13 @@ dist_man_MANS = \ doc/pcre2_set_max_pattern_length.3 \ doc/pcre2_set_max_varlookbehind.3 \ doc/pcre2_set_offset_limit.3 \ + doc/pcre2_set_optimize.3 \ doc/pcre2_set_newline.3 \ doc/pcre2_set_parens_nest_limit.3 \ doc/pcre2_set_recursion_limit.3 \ doc/pcre2_set_recursion_memory_management.3 \ doc/pcre2_set_substitute_callout.3 \ + doc/pcre2_set_substitute_case_callout.3 \ doc/pcre2_substitute.3 \ doc/pcre2_substring_copy_byname.3 \ doc/pcre2_substring_copy_bynumber.3 \ @@ -272,6 +277,14 @@ EXTRA_DIST += \ NON-AUTOTOOLS-BUILD \ HACKING +# These are support files for building with Bazel or Zig + +EXTRA_DIST += \ + BUILD.bazel \ + MODULE.bazel \ + WORKSPACE.bazel \ + build.zig + # These are support files for building under VMS EXTRA_DIST += \ @@ -280,16 +293,6 @@ EXTRA_DIST += \ vms/pcre2.h_patch \ vms/stdint.h -# These files are used in the preparation of a release - -EXTRA_DIST += \ - PrepareRelease \ - CheckMan \ - CleanTxt \ - Detrail \ - 132html \ - doc/index.html.src - # These files are usable versions of pcre2.h and config.h that are distributed # for the benefit of people who are building PCRE2 manually, without the # Autotools support. @@ -374,6 +377,8 @@ COMMON_SOURCES = \ src/pcre2_auto_possess.c \ src/pcre2_chkdint.c \ src/pcre2_compile.c \ + src/pcre2_compile.h \ + src/pcre2_compile_class.c \ src/pcre2_config.c \ src/pcre2_context.c \ src/pcre2_convert.c \ @@ -383,6 +388,7 @@ COMMON_SOURCES = \ src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h \ src/pcre2_jit_compile.c \ src/pcre2_jit_neon_inc.h \ src/pcre2_jit_simd_inc.h \ @@ -401,6 +407,7 @@ COMMON_SOURCES = \ src/pcre2_tables.c \ src/pcre2_ucd.c \ src/pcre2_ucp.h \ + src/pcre2_util.h \ src/pcre2_valid_utf.c \ src/pcre2_xclass.c @@ -460,39 +467,39 @@ CLEANFILES += src/pcre2_chartables.c # when pcre2_jit_compile.c is processed, so they must be distributed. EXTRA_DIST += \ - src/sljit/sljitConfig.h \ - src/sljit/sljitConfigCPU.h \ - src/sljit/sljitConfigInternal.h \ - src/sljit/sljitLir.c \ - src/sljit/sljitLir.h \ - src/sljit/sljitNativeARM_32.c \ - src/sljit/sljitNativeARM_64.c \ - src/sljit/sljitNativeARM_T2_32.c \ - src/sljit/sljitNativeLOONGARCH_64.c \ - src/sljit/sljitNativeMIPS_32.c \ - src/sljit/sljitNativeMIPS_64.c \ - src/sljit/sljitNativeMIPS_common.c \ - src/sljit/sljitNativePPC_32.c \ - src/sljit/sljitNativePPC_64.c \ - src/sljit/sljitNativePPC_common.c \ - src/sljit/sljitNativeRISCV_32.c \ - src/sljit/sljitNativeRISCV_64.c \ - src/sljit/sljitNativeRISCV_common.c \ - src/sljit/sljitNativeS390X.c \ - src/sljit/sljitNativeX86_32.c \ - src/sljit/sljitNativeX86_64.c \ - src/sljit/sljitNativeX86_common.c \ - src/sljit/sljitSerialize.c \ - src/sljit/sljitUtils.c \ - src/sljit/allocator_src/sljitExecAllocatorApple.c \ - src/sljit/allocator_src/sljitExecAllocatorCore.c \ - src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \ - src/sljit/allocator_src/sljitExecAllocatorPosix.c \ - src/sljit/allocator_src/sljitExecAllocatorWindows.c \ - src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \ - src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \ - src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \ - src/sljit/allocator_src/sljitWXExecAllocatorWindows.c + deps/sljit/sljit_src/sljitConfig.h \ + deps/sljit/sljit_src/sljitConfigCPU.h \ + deps/sljit/sljit_src/sljitConfigInternal.h \ + deps/sljit/sljit_src/sljitLir.c \ + deps/sljit/sljit_src/sljitLir.h \ + deps/sljit/sljit_src/sljitNativeARM_32.c \ + deps/sljit/sljit_src/sljitNativeARM_64.c \ + deps/sljit/sljit_src/sljitNativeARM_T2_32.c \ + deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_32.c \ + deps/sljit/sljit_src/sljitNativeMIPS_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_common.c \ + deps/sljit/sljit_src/sljitNativePPC_32.c \ + deps/sljit/sljit_src/sljitNativePPC_64.c \ + deps/sljit/sljit_src/sljitNativePPC_common.c \ + deps/sljit/sljit_src/sljitNativeRISCV_32.c \ + deps/sljit/sljit_src/sljitNativeRISCV_64.c \ + deps/sljit/sljit_src/sljitNativeRISCV_common.c \ + deps/sljit/sljit_src/sljitNativeS390X.c \ + deps/sljit/sljit_src/sljitNativeX86_32.c \ + deps/sljit/sljit_src/sljitNativeX86_64.c \ + deps/sljit/sljit_src/sljitNativeX86_common.c \ + deps/sljit/sljit_src/sljitSerialize.c \ + deps/sljit/sljit_src/sljitUtils.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c # Some of the JIT sources are also in separate files that are #included. @@ -710,9 +717,12 @@ EXTRA_DIST += \ testdata/grepinput \ testdata/grepinput3 \ testdata/grepinput8 \ + testdata/grepinputBad8 \ + testdata/grepinputBad8_Trail \ testdata/grepinputC.bz2 \ testdata/grepinputC.gz \ testdata/grepinputM \ + testdata/grepinputUN \ testdata/grepinputv \ testdata/grepinputx \ testdata/greplist \ @@ -755,6 +765,7 @@ EXTRA_DIST += \ testdata/testinput24 \ testdata/testinput25 \ testdata/testinput26 \ + testdata/testinput27 \ testdata/testinputEBC \ testdata/testinputheap \ testdata/testoutput1 \ @@ -799,6 +810,7 @@ EXTRA_DIST += \ testdata/testoutput24 \ testdata/testoutput25 \ testdata/testoutput26 \ + testdata/testoutput27 \ testdata/testoutputEBC \ testdata/testoutputheap-16 \ testdata/testoutputheap-32 \ @@ -819,7 +831,7 @@ CLEANFILES += \ test3outputB \ testtry \ teststdout \ - teststderr \ + teststderr \ teststderrgrep \ testtemp1grep \ testtemp2grep \ @@ -957,7 +969,6 @@ endif # WITH_GCOV EXTRA_DIST += \ cmake/COPYING-CMAKE-SCRIPTS \ cmake/FindEditline.cmake \ - cmake/FindPackageHandleStandardArgs.cmake \ cmake/FindReadline.cmake \ cmake/pcre2-config-version.cmake.in \ cmake/pcre2-config.cmake.in \ diff --git a/Makefile.in b/Makefile.in index 5fb3982..170df83 100644 --- a/Makefile.in +++ b/Makefile.in @@ -238,22 +238,24 @@ _libs_libpcre2_fuzzsupport_a_OBJECTS = \ $(am__libs_libpcre2_fuzzsupport_a_OBJECTS) libpcre2_16_la_DEPENDENCIES = am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \ - src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ src/pcre2_internal.h src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c src/pcre2_jit_neon_inc.h \ - src/pcre2_jit_simd_inc.h src/pcre2_maketables.c \ - src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ - src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ - src/pcre2_script_run.c src/pcre2_serialize.c \ - src/pcre2_string_utils.c src/pcre2_study.c \ - src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ - src/pcre2_valid_utf.c src/pcre2_xclass.c + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \ src/libpcre2_16_la-pcre2_chkdint.lo \ src/libpcre2_16_la-pcre2_compile.lo \ + src/libpcre2_16_la-pcre2_compile_class.lo \ src/libpcre2_16_la-pcre2_config.lo \ src/libpcre2_16_la-pcre2_context.lo \ src/libpcre2_16_la-pcre2_convert.lo \ @@ -294,22 +296,24 @@ libpcre2_16_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ @WITH_PCRE2_16_TRUE@am_libpcre2_16_la_rpath = -rpath $(libdir) libpcre2_32_la_DEPENDENCIES = am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \ - src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ src/pcre2_internal.h src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c src/pcre2_jit_neon_inc.h \ - src/pcre2_jit_simd_inc.h src/pcre2_maketables.c \ - src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ - src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ - src/pcre2_script_run.c src/pcre2_serialize.c \ - src/pcre2_string_utils.c src/pcre2_study.c \ - src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ - src/pcre2_valid_utf.c src/pcre2_xclass.c + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \ src/libpcre2_32_la-pcre2_chkdint.lo \ src/libpcre2_32_la-pcre2_compile.lo \ + src/libpcre2_32_la-pcre2_compile_class.lo \ src/libpcre2_32_la-pcre2_config.lo \ src/libpcre2_32_la-pcre2_context.lo \ src/libpcre2_32_la-pcre2_convert.lo \ @@ -346,22 +350,24 @@ libpcre2_32_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ @WITH_PCRE2_32_TRUE@am_libpcre2_32_la_rpath = -rpath $(libdir) libpcre2_8_la_DEPENDENCIES = am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \ - src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ src/pcre2_internal.h src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c src/pcre2_jit_neon_inc.h \ - src/pcre2_jit_simd_inc.h src/pcre2_maketables.c \ - src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ - src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ - src/pcre2_script_run.c src/pcre2_serialize.c \ - src/pcre2_string_utils.c src/pcre2_study.c \ - src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ - src/pcre2_valid_utf.c src/pcre2_xclass.c + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \ src/libpcre2_8_la-pcre2_chkdint.lo \ src/libpcre2_8_la-pcre2_compile.lo \ + src/libpcre2_8_la-pcre2_compile_class.lo \ src/libpcre2_8_la-pcre2_config.lo \ src/libpcre2_8_la-pcre2_context.lo \ src/libpcre2_8_la-pcre2_convert.lo \ @@ -510,6 +516,7 @@ am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsu src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo \ src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo \ @@ -538,6 +545,7 @@ am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsu src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo \ src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo \ @@ -566,6 +574,7 @@ am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsu src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo \ src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo \ @@ -853,9 +862,9 @@ am__DIST_COMMON = $(dist_man_MANS) $(srcdir)/Makefile.in \ $(srcdir)/libpcre2-16.pc.in $(srcdir)/libpcre2-32.pc.in \ $(srcdir)/libpcre2-8.pc.in $(srcdir)/libpcre2-posix.pc.in \ $(srcdir)/pcre2-config.in $(top_srcdir)/src/config.h.in \ - $(top_srcdir)/src/pcre2.h.in AUTHORS COPYING ChangeLog INSTALL \ - NEWS README ar-lib compile config.guess config.sub depcomp \ - install-sh ltmain.sh missing test-driver + $(top_srcdir)/src/pcre2.h.in AUTHORS.md COPYING ChangeLog \ + INSTALL NEWS README ar-lib compile config.guess config.sub \ + depcomp install-sh ltmain.sh missing test-driver DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) @@ -974,7 +983,6 @@ VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ -VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -1036,12 +1044,13 @@ AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 AM_CPPFLAGS = "-I$(srcdir)/src" dist_doc_DATA = \ - AUTHORS \ + AUTHORS.md \ COPYING \ ChangeLog \ - LICENCE \ + LICENCE.md \ NEWS \ README \ + SECURITY.md \ doc/pcre2.txt \ doc/pcre2-config.txt \ doc/pcre2grep.txt \ @@ -1112,11 +1121,13 @@ dist_html_DATA = \ doc/html/pcre2_set_max_pattern_length.html \ doc/html/pcre2_set_max_varlookbehind.html \ doc/html/pcre2_set_offset_limit.html \ + doc/html/pcre2_set_optimize.html \ doc/html/pcre2_set_newline.html \ doc/html/pcre2_set_parens_nest_limit.html \ doc/html/pcre2_set_recursion_limit.html \ doc/html/pcre2_set_recursion_memory_management.html \ doc/html/pcre2_set_substitute_callout.html \ + doc/html/pcre2_set_substitute_case_callout.html \ doc/html/pcre2_substitute.html \ doc/html/pcre2_substring_copy_byname.html \ doc/html/pcre2_substring_copy_bynumber.html \ @@ -1211,11 +1222,13 @@ dist_man_MANS = \ doc/pcre2_set_max_pattern_length.3 \ doc/pcre2_set_max_varlookbehind.3 \ doc/pcre2_set_offset_limit.3 \ + doc/pcre2_set_optimize.3 \ doc/pcre2_set_newline.3 \ doc/pcre2_set_parens_nest_limit.3 \ doc/pcre2_set_recursion_limit.3 \ doc/pcre2_set_recursion_memory_management.3 \ doc/pcre2_set_substitute_callout.3 \ + doc/pcre2_set_substitute_case_callout.3 \ doc/pcre2_substitute.3 \ doc/pcre2_substring_copy_byname.3 \ doc/pcre2_substring_copy_bynumber.3 \ @@ -1279,9 +1292,9 @@ MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic # These files contain maintenance information -# These are support files for building under VMS +# These are support files for building with Bazel or Zig -# These files are used in the preparation of a release +# These are support files for building under VMS # These files are usable versions of pcre2.h and config.h that are distributed # for the benefit of people who are building PCRE2 manually, without the @@ -1300,42 +1313,50 @@ MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic # PCRE2 demonstration program. Not built automatically. The point is that the # users should build it themselves. So just distribute the source. EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ - NON-AUTOTOOLS-BUILD HACKING vms/configure.com \ + NON-AUTOTOOLS-BUILD HACKING BUILD.bazel MODULE.bazel \ + WORKSPACE.bazel build.zig vms/configure.com \ vms/openvms_readme.txt vms/pcre2.h_patch vms/stdint.h \ - PrepareRelease CheckMan CleanTxt Detrail 132html \ - doc/index.html.src src/pcre2.h.generic src/config.h.generic \ - src/pcre2_ucptables.c src/pcre2_chartables.c.dist \ - src/sljit/sljitConfig.h src/sljit/sljitConfigCPU.h \ - src/sljit/sljitConfigInternal.h src/sljit/sljitLir.c \ - src/sljit/sljitLir.h src/sljit/sljitNativeARM_32.c \ - src/sljit/sljitNativeARM_64.c src/sljit/sljitNativeARM_T2_32.c \ - src/sljit/sljitNativeLOONGARCH_64.c \ - src/sljit/sljitNativeMIPS_32.c src/sljit/sljitNativeMIPS_64.c \ - src/sljit/sljitNativeMIPS_common.c \ - src/sljit/sljitNativePPC_32.c src/sljit/sljitNativePPC_64.c \ - src/sljit/sljitNativePPC_common.c \ - src/sljit/sljitNativeRISCV_32.c \ - src/sljit/sljitNativeRISCV_64.c \ - src/sljit/sljitNativeRISCV_common.c \ - src/sljit/sljitNativeS390X.c src/sljit/sljitNativeX86_32.c \ - src/sljit/sljitNativeX86_64.c \ - src/sljit/sljitNativeX86_common.c src/sljit/sljitSerialize.c \ - src/sljit/sljitUtils.c \ - src/sljit/allocator_src/sljitExecAllocatorApple.c \ - src/sljit/allocator_src/sljitExecAllocatorCore.c \ - src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \ - src/sljit/allocator_src/sljitExecAllocatorPosix.c \ - src/sljit/allocator_src/sljitExecAllocatorWindows.c \ - src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \ - src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \ - src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \ - src/sljit/allocator_src/sljitWXExecAllocatorWindows.c \ + src/pcre2.h.generic src/config.h.generic src/pcre2_ucptables.c \ + src/pcre2_chartables.c.dist deps/sljit/sljit_src/sljitConfig.h \ + deps/sljit/sljit_src/sljitConfigCPU.h \ + deps/sljit/sljit_src/sljitConfigInternal.h \ + deps/sljit/sljit_src/sljitLir.c \ + deps/sljit/sljit_src/sljitLir.h \ + deps/sljit/sljit_src/sljitNativeARM_32.c \ + deps/sljit/sljit_src/sljitNativeARM_64.c \ + deps/sljit/sljit_src/sljitNativeARM_T2_32.c \ + deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_32.c \ + deps/sljit/sljit_src/sljitNativeMIPS_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_common.c \ + deps/sljit/sljit_src/sljitNativePPC_32.c \ + deps/sljit/sljit_src/sljitNativePPC_64.c \ + deps/sljit/sljit_src/sljitNativePPC_common.c \ + deps/sljit/sljit_src/sljitNativeRISCV_32.c \ + deps/sljit/sljit_src/sljitNativeRISCV_64.c \ + deps/sljit/sljit_src/sljitNativeRISCV_common.c \ + deps/sljit/sljit_src/sljitNativeS390X.c \ + deps/sljit/sljit_src/sljitNativeX86_32.c \ + deps/sljit/sljit_src/sljitNativeX86_64.c \ + deps/sljit/sljit_src/sljitNativeX86_common.c \ + deps/sljit/sljit_src/sljitSerialize.c \ + deps/sljit/sljit_src/sljitUtils.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c \ src/pcre2_jit_match.c src/pcre2_jit_misc.c \ src/pcre2_printint.c RunTest.bat $(am__append_44) \ testdata/grepbinary testdata/grepfilelist testdata/grepinput \ - testdata/grepinput3 testdata/grepinput8 \ - testdata/grepinputC.bz2 testdata/grepinputC.gz \ - testdata/grepinputM testdata/grepinputv testdata/grepinputx \ + testdata/grepinput3 testdata/grepinput8 testdata/grepinputBad8 \ + testdata/grepinputBad8_Trail testdata/grepinputC.bz2 \ + testdata/grepinputC.gz testdata/grepinputM \ + testdata/grepinputUN testdata/grepinputv testdata/grepinputx \ testdata/greplist testdata/grepnot.bz2 testdata/grepoutput \ testdata/grepoutput8 testdata/grepoutputC \ testdata/grepoutputCN testdata/grepoutputCNU \ @@ -1350,7 +1371,7 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ testdata/testinput16 testdata/testinput17 testdata/testinput18 \ testdata/testinput19 testdata/testinput20 testdata/testinput21 \ testdata/testinput22 testdata/testinput23 testdata/testinput24 \ - testdata/testinput25 testdata/testinput26 \ + testdata/testinput25 testdata/testinput26 testdata/testinput27 \ testdata/testinputEBC testdata/testinputheap \ testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \ testdata/testoutput3A testdata/testoutput3B \ @@ -1372,13 +1393,13 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ testdata/testoutput22-32 testdata/testoutput22-8 \ testdata/testoutput23 testdata/testoutput24 \ testdata/testoutput25 testdata/testoutput26 \ - testdata/testoutputEBC testdata/testoutputheap-16 \ - testdata/testoutputheap-32 testdata/testoutputheap-8 \ - testdata/valgrind-jit.supp testdata/wintestinput3 \ - testdata/wintestoutput3 perltest.sh src/pcre2demo.c \ - cmake/COPYING-CMAKE-SCRIPTS cmake/FindEditline.cmake \ - cmake/FindPackageHandleStandardArgs.cmake \ - cmake/FindReadline.cmake cmake/pcre2-config-version.cmake.in \ + testdata/testoutput27 testdata/testoutputEBC \ + testdata/testoutputheap-16 testdata/testoutputheap-32 \ + testdata/testoutputheap-8 testdata/valgrind-jit.supp \ + testdata/wintestinput3 testdata/wintestoutput3 perltest.sh \ + src/pcre2demo.c cmake/COPYING-CMAKE-SCRIPTS \ + cmake/FindEditline.cmake cmake/FindReadline.cmake \ + cmake/pcre2-config-version.cmake.in \ cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in # These are the header files we'll install. We do not distribute pcre2.h @@ -1395,6 +1416,8 @@ COMMON_SOURCES = \ src/pcre2_auto_possess.c \ src/pcre2_chkdint.c \ src/pcre2_compile.c \ + src/pcre2_compile.h \ + src/pcre2_compile_class.c \ src/pcre2_config.c \ src/pcre2_context.c \ src/pcre2_convert.c \ @@ -1404,6 +1427,7 @@ COMMON_SOURCES = \ src/pcre2_find_bracket.c \ src/pcre2_internal.h \ src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h \ src/pcre2_jit_compile.c \ src/pcre2_jit_neon_inc.h \ src/pcre2_jit_simd_inc.h \ @@ -1422,6 +1446,7 @@ COMMON_SOURCES = \ src/pcre2_tables.c \ src/pcre2_ucd.c \ src/pcre2_ucp.h \ + src/pcre2_util.h \ src/pcre2_valid_utf.c \ src/pcre2_xclass.c @@ -1562,15 +1587,15 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ - echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ - $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ && exit 0; \ exit 1;; \ esac; \ done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu Makefile + $(AUTOMAKE) --foreign Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ @@ -1748,6 +1773,8 @@ src/libpcre2_16_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_config.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_16_la-pcre2_context.lo: src/$(am__dirstamp) \ @@ -1807,6 +1834,8 @@ src/libpcre2_32_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_config.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_32_la-pcre2_context.lo: src/$(am__dirstamp) \ @@ -1866,6 +1895,8 @@ src/libpcre2_8_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_compile.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_config.lo: src/$(am__dirstamp) \ src/$(DEPDIR)/$(am__dirstamp) src/libpcre2_8_la-pcre2_context.lo: src/$(am__dirstamp) \ @@ -2023,6 +2054,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo@am__quote@ # am--include-marker @@ -2051,6 +2083,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo@am__quote@ # am--include-marker @@ -2079,6 +2112,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo@am__quote@ # am--include-marker @@ -2206,6 +2240,13 @@ src/libpcre2_16_la-pcre2_compile.lo: src/pcre2_compile.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +src/libpcre2_16_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Tpo -c -o src/libpcre2_16_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_16_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + src/libpcre2_16_la-pcre2_config.lo: src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo -c -o src/libpcre2_16_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo @@ -2402,6 +2443,13 @@ src/libpcre2_32_la-pcre2_compile.lo: src/pcre2_compile.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +src/libpcre2_32_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Tpo -c -o src/libpcre2_32_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_32_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + src/libpcre2_32_la-pcre2_config.lo: src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo -c -o src/libpcre2_32_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo @@ -2598,6 +2646,13 @@ src/libpcre2_8_la-pcre2_compile.lo: src/pcre2_compile.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +src/libpcre2_8_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Tpo -c -o src/libpcre2_8_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_8_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + src/libpcre2_8_la-pcre2_config.lo: src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo -c -o src/libpcre2_8_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo @@ -3562,6 +3617,7 @@ distclean: distclean-am -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo @@ -3590,6 +3646,7 @@ distclean: distclean-am -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo @@ -3618,6 +3675,7 @@ distclean: distclean-am -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo @@ -3708,6 +3766,7 @@ maintainer-clean: maintainer-clean-am -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo @@ -3736,6 +3795,7 @@ maintainer-clean: maintainer-clean-am -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo @@ -3764,6 +3824,7 @@ maintainer-clean: maintainer-clean-am -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo diff --git a/NEWS b/NEWS index 5f8dde3..6488d74 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,92 @@ News about PCRE2 releases ------------------------- +Version 10.45-RC1 27-December-2024 +---------------------------------- + +This is a comparatively large release, incorporating new features, some +bugfixes, and a few changes with slight backwards compatibility implications. +Please see the ChangeLog and Git log for further details. + +Only changes to behaviour, changes to the API, and major changes to the pattern +syntax are described here. + +This release is the first to be available as a (signed) Git tag, or +alternatively as a (signed) tarball of the Git tag. + +This is also the first release to be made by the new maintainers of PCRE2, and +we would like to thank Philip Hazel, creator and maintainer of PCRE and PCRE2. + +* (Git change) The sljit project has been split out into a separate Git + repository. Git users must now run `git submodule init; git submodule update` + after a Git checkout. + +* (Behaviour change) Update Unicode support to UCD 16. + +* (Match behaviour change) Case-insensitive matching of Unicode properties + Ll, Lt, and Lu has been changed to match Perl. Previously, /\p{Ll}/i would + match only lower-case characters (even though case-insensitive matching was + specified). This also affects case-insensitive matching of POSIX classes such + as [:lower:]. + +* (Minor match behaviour change) Case-insensitive matching of backreferences now + respects the PCRE2_EXTRA_CASELESS_RESTRICT option. + +* (Minor pattern syntax change) Parsing of the \x escape is stricter, and is + no longer parsed as an escape for the NUL character if not followed by '{' or + a hexadecimal digit. Use \x00 instead. + +* (Major new feature) Add a new feature called scan substring. This is a new + type of assertion which matches the content of a capturing block to a + sub-pattern. + + Example: to find a word that contains the rare (in English) sequence of + letters "rh" not at the start: + + \b(\w++)(*scan_substring:(1).+rh) + + The first group captures a word which is then scanned by the + (*scan_substring:(1) ... ) assertion, which tests whether the pattern ".+rh" + matches the capture group "(1)". + +* (Major new feature) Add support for UTS#18 compatible character classes, + using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a + metacharacter within character classes and the operators '&&', '--' and '~~', + allowing subtractions and intersections of character classes to be easily + expressed. + + Example: to match Thai or Greek letters (but not letters or other characters + in those scripts), use [\p{L}&&[\p{Thai}||\p{Greek}]]. + +* (Major new feature) Add support for Perl-style extended character classes, + using the syntax (?[...]). This also allows expressing subtractions and + intersections of character classes, but using a different syntax to UTS#18. + + Example: to match Thai or Greek letters (but not letters or other characters + in those scripts), use (?[\p{L} & (\p{Thai} + \p{Greek})]). + +* (Minor feature) Significant improvements to the character class match engine. + Compiled character classes are now more compact, and have faster matching + for large or complex character sets, using binary search through the set. + +* JIT compilation now fails with the new error code PCRE2_ERROR_JIT_UNSUPPORTED + for patterns which use features not supported by the JIT compiler. + +* (Minor feature) New options PCRE2_EXTRA_NO_BS0 (disallow \0 as an escape for + the NUL character); PCRE2_EXTRA_PYTHON_OCTAL (use Python disambiguation rules + for deciding whether \12 is a backreference or an octal escape); + PCRE2_EXTRA_NEVER_CALLOUT (disable callout syntax entirely); + PCRE2_EXTRA_TURKISH_CASING (use Turkish rules for case-insensitive matching). + +* (Minor feature) Add new API function pcre2_set_optimize() for controlling + which optimizations are enabled. + +* (Minor new features) A variety of extensions have been made to + pcre2_substitute() and its syntax for replacement strings. These now support: + \123 octal escapes; titlecasing \u\L; \1 backreferences; \g<1> and $ + backreferences; $& $` $' and $_; new function + pcre2_set_substitute_case_callout() to allow locale-aware case transformation. + Version 10.44 07-June-2024 -------------------------- @@ -13,7 +99,7 @@ increased to 128. Some auxiliary files for building under VMS are added. Version 10.43 16-February-2024 ------------------------------ -There are quite a lot of changes in this release (see ChangeLog and git log for +There are quite a lot of changes in this release (see ChangeLog and Git log for a list). Those that are not bugfixes or code tidies are: * The JIT code no longer supports ARMv5 architecture. @@ -52,7 +138,7 @@ a list). Those that are not bugfixes or code tidies are: matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can be used to keep it ASCII only. -* Make PCRE2_UCP the default in UTF mode in pcre2grep and add -no_ucp, +* Make PCRE2_UCP the default in UTF mode in pcre2grep and add --no-ucp, --case-restrict and --posix-digit. * Add --group-separator and --no-group-separator to pcre2grep. diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD index 851976a..bb687f7 100644 --- a/NON-AUTOTOOLS-BUILD +++ b/NON-AUTOTOOLS-BUILD @@ -105,6 +105,7 @@ example. pcre2_chkdint.c pcre2_chartables.c pcre2_compile.c + pcre2_compile_class.c pcre2_config.c pcre2_context.c pcre2_convert.c @@ -138,7 +139,7 @@ example. Note that you must compile pcre2_jit_compile.c, even if you have not defined SUPPORT_JIT in src/config.h, because when JIT support is not configured, dummy functions are compiled. When JIT support IS configured, - pcre2_jit_compile.c #includes other files from the sljit subdirectory, + pcre2_jit_compile.c #includes other files from the sljit dependency, all of whose names begin with "sljit". It also #includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile those yourself. @@ -301,56 +302,66 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no spaces in the names for your CMake installation and your PCRE2 source and build directories. -The following instructions were contributed by a PCRE1 user, but they should -also work for PCRE2. If they are not followed exactly, errors may occur. In the -event that errors do occur, it is recommended that you delete the CMake cache -before attempting to repeat the CMake build process. In the CMake GUI, the -cache can be deleted by selecting "File > Delete Cache". +If you are using CMake and encounter errors, deleting the CMake cache and +restarting from a fresh build may fix the error. In the CMake GUI, the cache can +be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can +be deleted. -1. Install the latest CMake version available from http://www.cmake.org/, and - ensure that cmake\bin is on your path. +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. -2. Unzip (retaining folder structure) the PCRE2 source tree into a source - directory such as C:\pcre2. You should ensure your local date and time - is not earlier than the file dates in your source dir if the release is - very new. +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. -3. Create a new, empty build directory, preferably a subdirectory of the - source dir. For example, C:\pcre2\pcre2-xx\build. +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. -4. Run cmake-gui from the Shell environment of your build tool, for example, - Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try - to start Cmake from the Windows Start menu, as this can lead to errors. +4. Run CMake. -5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and - build directories, respectively. + - Using the CLI, simply run `cmake ..` inside the `build/` directory. You can + use the `ccmake` ncurses GUI to select and configure PCRE2 features. -6. Hit the "Configure" button. + - Using the CMake GUI: -7. Select the particular IDE / build tool that you are using (Visual - Studio, MSYS makefiles, MinGW makefiles, etc.) + a) Run cmake-gui from the Shell environment of your build tool, for + example, Msys for Msys/MinGW or Visual Studio Command Prompt for + VC/VC++. -8. The GUI will then list several configuration options. This is where - you can disable Unicode support or select other PCRE2 optional features. + b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. -9. Hit "Configure" again. The adjacent "Generate" button should now be - active. + c) Press the "Configure" button. -10. Hit "Generate". + d) Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) -11. The build directory should now contain a usable build system, be it a - solution file for Visual Studio, makefiles for MinGW, etc. Exit from - cmake-gui and use the generated build system with your compiler or IDE. - E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 - solution, select the desired configuration (Debug, or Release, etc.) and - build the ALL_BUILD project. + e) The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. -12. If during configuration with cmake-gui you've elected to build the test - programs, you can execute them by building the test project. E.g., for - MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The - most recent build configuration is targeted by the tests. A summary of - test results is presented. Complete test output is subsequently - available for review in Testing\Temporary under your build dir. + f) Press "Configure" again. The adjacent "Generate" button should now be + active. + + g) Press "Generate". + +5. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + + Regardless of build system used, `cmake --build .` will build it. + +6. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + Regardless of build system used, `ctest` will run the tests. BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO @@ -425,6 +436,7 @@ OpenVMS. They are in the "vms" directory in the distribution tarball. Please read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep programs contain some VMS-specific code. -=========================== -Last Updated: 16 April 2024 -=========================== +============================== +Last updated: 26 December 2024 +============================== + diff --git a/PrepareRelease b/PrepareRelease deleted file mode 100755 index 0dc6e30..0000000 --- a/PrepareRelease +++ /dev/null @@ -1,257 +0,0 @@ -#/bin/sh - -# Script to prepare the files for building a PCRE2 release. It does some -# processing of the documentation, detrails files, and creates pcre2.h.generic -# and config.h.generic (for use by builders who can't run ./configure). - -# You must run this script before runnning "make dist". If its first argument -# is "doc", it stops after preparing the documentation. There are no other -# arguments. The script makes use of the following files: - -# 132html A Perl script that converts a .1 or .3 man page into HTML. It -# "knows" the relevant troff constructs that are used in the PCRE2 -# man pages. - -# CheckMan A Perl script that checks man pages for typos in the mark up. - -# CleanTxt A Perl script that cleans up the output of "nroff -man" by -# removing backspaces and other redundant text so as to produce -# a readable .txt file. - -# Detrail A Perl script that removes trailing spaces from files. - -# doc/index.html.src -# A file that is copied as index.html into the doc/html directory -# when the HTML documentation is built. It works like this so that -# doc/html can be deleted and re-created from scratch. - -# README & NON-AUTOTOOLS-BUILD -# These files are copied into the doc/html directory, with .txt -# extensions so that they can by hyperlinked from the HTML -# documentation, because some people just go to the HTML without -# looking for text files. - - -# First, sort out the documentation. Remove pcre2demo.3 first because it won't -# pass the markup check (it is created below, using markup that none of the -# other pages use). - -cd doc -echo Processing documentation - -/bin/rm -f pcre2demo.3 - -# Check the remaining man pages - -perl ../CheckMan *.1 *.3 -if [ $? != 0 ] ; then exit 1; fi - -# Make Text form of the documentation. It needs some mangling to make it -# tidy for online reading. Concatenate all the .3 stuff, but omit the -# individual function pages. - -cat <pcre2.txt ------------------------------------------------------------------------------ -This file contains a concatenation of the PCRE2 man pages, converted to plain -text format for ease of searching with a text editor, or for use on systems -that do not have a man page processor. The small individual files that give -synopses of each function in the library have not been included. Neither has -the pcre2demo program. There are separate text files for the pcre2grep and -pcre2test commands. ------------------------------------------------------------------------------ - - -End - -echo "Making pcre2.txt" -for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \ - pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \ - pcre2posix pcre2sample pcre2serialize pcre2syntax \ - pcre2unicode ; do - echo " Processing $file.3" - nroff -c -man $file.3 >$file.rawtxt - perl ../CleanTxt <$file.rawtxt >>pcre2.txt - /bin/rm $file.rawtxt - echo "------------------------------------------------------------------------------" >>pcre2.txt - if [ "$file" != "pcre2sample" ] ; then - echo " " >>pcre2.txt - echo " " >>pcre2.txt - fi -done - -# The three commands -for file in pcre2test pcre2grep pcre2-config ; do - echo Making $file.txt - nroff -c -man $file.1 >$file.rawtxt - perl ../CleanTxt <$file.rawtxt >$file.txt - /bin/rm $file.rawtxt -done - - -# Make pcre2demo.3 from the pcre2demo.c source file - -echo "Making pcre2demo.3" -perl <<"END" >pcre2demo.3 - use Time::Piece; - open(VH, "<", "../src/config.h.generic") || die "Failed to open src/config.h.generic\n"; - open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n"; - open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n"; - my $version; - while () - { - chomp; - if ( /^#define PACKAGE_STRING "([^"]+)"/ ) { $version = $1 ; last } - } - my $t = localtime; - print OUT ".TH PCRE2DEMO 3 \"", $t->strftime('%e %B %Y'), '" "', $version, "\"\n" . - ".\\\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!\n" . - ".SH NAME\n" . - "PCRE2DEMO - A demonstration C program for PCRE2\n" . - ".SH \"SOURCE CODE\"\n" . - ".rs\n" . - ".sp\n" . - ".\\\" Start example.\n" . - ".de EX\n" . - ". do ds mF \\\\n[.fam]\n" . - ". nr mE \\\\n(.f\n" . - ". nf\n" . - ". nh\n" . - ". do fam C\n" . - ". ft CW\n" . - "..\n" . - ".\n" . - ".\n" . - ".\\\" End example.\n" . - ".de EE\n" . - ". do fam \\\\*(mF\n" . - ". ft \\\\n(mE\n" . - ". fi\n" . - ". hy \\\\n(HY\n" . - "..\n" . - ".\n" . - ".RS -7\n" . - ".EX\n" ; - while () - { - s/\\/\\e/g; - print OUT; - } - print OUT ".EE\n"; - close(IN); - close(OUT); -END -if [ $? != 0 ] ; then exit 1; fi - - -# Make HTML form of the documentation. - -echo "Making HTML documentation" -/bin/rm html/* -cp index.html.src html/index.html -cp ../README html/README.txt -cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt - -for file in *.1 ; do - base=`basename $file .1` - echo " Making $base.html" - perl ../132html -toc $base <$file >html/$base.html -done - -# Exclude table of contents for function summaries. It seems that expr -# forces an anchored regex. Also exclude them for small pages that have -# only one section. - -for file in *.3 ; do - base=`basename $file .3` - toc=-toc - if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi - if [ "$base" = "pcre2sample" ] || \ - [ "$base" = "pcre2compat" ] || \ - [ "$base" = "pcre2demo" ] || \ - [ "$base" = "pcre2limits" ] || \ - [ "$base" = "pcre2unicode" ] ; then - toc="" - fi - echo " Making $base.html" - perl ../132html $toc $base <$file >html/$base.html - if [ $? != 0 ] ; then exit 1; fi -done - -# End of documentation processing; stop if only documentation required. - -cd .. -echo Documentation done -if [ "$1" = "doc" ] ; then exit; fi - -# These files are detrailed; do not detrail the test data because there may be -# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF -# line endings and the detrail script removes all trailing white space. The -# configure files are also omitted from the detrailing. - -files="\ - Makefile.am \ - configure.ac \ - README \ - LICENCE \ - COPYING \ - AUTHORS \ - NEWS \ - NON-AUTOTOOLS-BUILD \ - INSTALL \ - 132html \ - CleanTxt \ - Detrail \ - ChangeLog \ - CMakeLists.txt \ - RunGrepTest \ - RunTest \ - pcre2-config.in \ - perltest.sh \ - libpcre2-8.pc.in \ - libpcre2-16.pc.in \ - libpcre2-32.pc.in \ - libpcre2-posix.pc.in \ - src/pcre2_dftables.c \ - src/pcre2.h.in \ - src/pcre2_auto_possess.c \ - src/pcre2_compile.c \ - src/pcre2_config.c \ - src/pcre2_context.c \ - src/pcre2_convert.c \ - src/pcre2_dfa_match.c \ - src/pcre2_error.c \ - src/pcre2_extuni.c \ - src/pcre2_find_bracket.c \ - src/pcre2_internal.h \ - src/pcre2_intmodedep.h \ - src/pcre2_jit_compile.c \ - src/pcre2_jit_match.c \ - src/pcre2_jit_misc.c \ - src/pcre2_jit_test.c \ - src/pcre2_maketables.c \ - src/pcre2_match.c \ - src/pcre2_match_data.c \ - src/pcre2_newline.c \ - src/pcre2_ord2utf.c \ - src/pcre2_pattern_info.c \ - src/pcre2_printint.c \ - src/pcre2_string_utils.c \ - src/pcre2_study.c \ - src/pcre2_substring.c \ - src/pcre2_tables.c \ - src/pcre2_ucd.c \ - src/pcre2_ucp.h \ - src/pcre2_valid_utf.c \ - src/pcre2_xclass.c \ - src/pcre2demo.c \ - src/pcre2grep.c \ - src/pcre2posix.c \ - src/pcre2posix.h \ - src/pcre2test.c" - -echo Detrailing -perl ./Detrail $files doc/p* doc/html/* - -echo Done - -#End diff --git a/README b/README index dab5e94..5a50f7f 100644 --- a/README +++ b/README @@ -385,7 +385,7 @@ library. They are also documented in the pcre2build man page. If this is done, when pcre2test's input is from a terminal, it reads it using the readline() function. This provides line-editing and history facilities. - Note that libreadline is GPL-licenced, so if you distribute a binary of + Note that libreadline is GPL-licensed, so if you distribute a binary of pcre2test linked in this way, there may be licensing issues. These can be avoided by linking with libedit (which has a BSD licence) instead. @@ -411,20 +411,19 @@ library. They are also documented in the pcre2build man page. Instead of %td or %zu, %lu is used, with a cast for size_t values. . There is a special option called --enable-fuzz-support for use by people who - want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit - library. If set, it causes an extra library called libpcre2-fuzzsupport.a to - be built, but not installed. This contains a single function called - LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the - length of the string. When called, this function tries to compile the string - as a pattern, and if that succeeds, to match it. This is done both with no - options and with some random options bits that are generated from the string. - Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to - be created. This is normally run under valgrind or used when PCRE2 is - compiled with address sanitizing enabled. It calls the fuzzing function and - outputs information about what it is doing. The input strings are specified - by arguments: if an argument starts with "=" the rest of it is a literal - input string. Otherwise, it is assumed to be a file name, and the contents - of the file are the test string. + want to run fuzzing tests on PCRE2. If set, it causes an extra library + called libpcre2-fuzzsupport.a to be built, but not installed. This contains + a single function called LLVMFuzzerTestOneInput() whose arguments are a + pointer to a string and the length of the string. When called, this function + tries to compile the string as a pattern, and if that succeeds, to match + it. This is done both with no options and with some random options bits that + are generated from the string. Setting --enable-fuzz-support also causes an + executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally + run under valgrind or used when PCRE2 is compiled with address sanitizing + enabled. It calls the fuzzing function and outputs information about what it + is doing. The input strings are specified by arguments: if an argument + starts with "=" the rest of it is a literal input string. Otherwise, it is + assumed to be a file name, and the contents of the file are the test string. . Releases before 10.30 could be compiled with --disable-stack-for-recursion, which caused pcre2_match() to use individual blocks on the heap for @@ -510,6 +509,7 @@ system. The following are installed (file names are all relative to the LICENCE NEWS README + SECURITY pcre2.txt (a concatenation of the man(3) pages) pcre2test.txt the pcre2test man page pcre2grep.txt the pcre2grep man page @@ -607,8 +607,9 @@ zip formats. The command "make distcheck" does the same, but then does a trial build of the new distribution to ensure that it works. If you have modified any of the man page sources in the doc directory, you -should first run the PrepareRelease script before making a distribution. This -script creates the .txt and HTML forms of the documentation from the man pages. +should first run the maint/PrepareRelease script before making a distribution. +This script creates the .txt and HTML forms of the documentation from the man +pages. Testing PCRE2 @@ -822,37 +823,38 @@ The distribution should contain the files listed below. ASCII coding; unless --enable-rebuild-chartables is specified, used by copying to pcre2_chartables.c - src/pcre2posix.c ) - src/pcre2_auto_possess.c ) - src/pcre2_chkdint.c ) - src/pcre2_compile.c ) - src/pcre2_config.c ) - src/pcre2_context.c ) - src/pcre2_convert.c ) - src/pcre2_dfa_match.c ) - src/pcre2_error.c ) - src/pcre2_extuni.c ) - src/pcre2_find_bracket.c ) - src/pcre2_jit_compile.c ) - src/pcre2_jit_match.c ) sources for the functions in the library, - src/pcre2_jit_misc.c ) and some internal functions that they use - src/pcre2_maketables.c ) - src/pcre2_match.c ) - src/pcre2_match_data.c ) - src/pcre2_newline.c ) - src/pcre2_ord2utf.c ) - src/pcre2_pattern_info.c ) - src/pcre2_script_run.c ) - src/pcre2_serialize.c ) - src/pcre2_string_utils.c ) - src/pcre2_study.c ) - src/pcre2_substitute.c ) - src/pcre2_substring.c ) - src/pcre2_tables.c ) - src/pcre2_ucd.c ) - src/pcre2_ucptables.c ) - src/pcre2_valid_utf.c ) - src/pcre2_xclass.c ) + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_chkdint.c ) + src/pcre2_compile.c ) + src/pcre2_compile_class.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_ucptables.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) src/pcre2_printint.c debugging function that is used by pcre2test, src/pcre2_fuzzsupport.c function for (optional) fuzzing support @@ -860,13 +862,16 @@ The distribution should contain the files listed below. src/config.h.in template for config.h, when built by "configure" src/pcre2.h.in template for pcre2.h when built by "configure" src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_compile.h header for internal use src/pcre2_internal.h header for internal use src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_jit_char_inc.h header used by JIT src/pcre2_jit_neon_inc.h header used by JIT src/pcre2_jit_simd_inc.h header used by JIT src/pcre2_ucp.h header for Unicode property handling + src/pcre2_util.h header for internal utils - sljit/* source files for the JIT compiler + deps/sljit/sljit_src/* source files for the JIT compiler (B) Source files for programs that use PCRE2: @@ -878,48 +883,49 @@ The distribution should contain the files listed below. (C) Auxiliary files: - 132html script to turn "man" pages into HTML - AUTHORS information about the author of PCRE2 + AUTHORS.md information about the authors of PCRE2 ChangeLog log of changes to the code - CleanTxt script to clean nroff output for txt man pages - Detrail script to remove trailing spaces HACKING some notes about the internals of PCRE2 INSTALL generic installation instructions - LICENCE conditions for the use of PCRE2 + LICENCE.md conditions for the use of PCRE2 COPYING the same, using GNU's standard name + SECURITY.md information on reporting vulnerabilities Makefile.in ) template for Unix Makefile, which is built by ) "configure" Makefile.am ) the automake input that was used to create ) Makefile.in NEWS important changes in this release NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools - PrepareRelease script to make preparations for "make dist" README this file RunTest a Unix shell script for running tests RunGrepTest a Unix shell script for pcre2grep tests + RunTest.bat a Windows batch file for running tests + RunGrepTest.bat a Windows batch file for pcre2grep tests aclocal.m4 m4 macros (generated by "aclocal") - config.guess ) files used by libtool, - config.sub ) used only when building a shared library + m4/* m4 macros (used by autoconf) configure a configuring shell script (built by autoconf) configure.ac ) the autoconf input that was used to build ) "configure" and config.h - depcomp ) script to find program dependencies, generated by - ) automake doc/*.3 man page sources for PCRE2 doc/*.1 man page sources for pcre2grep and pcre2test - doc/index.html.src the base HTML page doc/html/* HTML documentation doc/pcre2.txt plain text version of the man pages + doc/pcre2-config.txt plain text documentation of pcre2-config script + doc/pcre2grep.txt plain text documentation of grep utility program doc/pcre2test.txt plain text documentation of test program - install-sh a shell script for installing files libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config - ltmain.sh file used to build a libtool script - missing ) common stub for a few missing GNU programs while - ) installing, generated by automake - mkinstalldirs script for making install directories + ar-lib ) + config.guess ) + config.sub ) + depcomp ) helper tools generated by libtool and + compile ) automake, used internally by ./configure + install-sh ) + ltmain.sh ) + missing ) + test-driver ) perltest.sh Script for running a Perl test program pcre2-config.in source of script which retains PCRE2 information testdata/testinput* test data for main library tests @@ -927,12 +933,13 @@ The distribution should contain the files listed below. testdata/grep* input and output for pcre2grep tests testdata/* other supporting test files -(D) Auxiliary files for cmake support +(D) Auxiliary files for CMake support cmake/COPYING-CMAKE-SCRIPTS - cmake/FindPackageHandleStandardArgs.cmake cmake/FindEditline.cmake cmake/FindReadline.cmake + cmake/pcre2-config-version.cmake.in + cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in @@ -943,14 +950,21 @@ The distribution should contain the files listed below. src/config.h.generic ) a version of config.h for use in non-"configure" ) environments -(F) Auxiliary files for building PCRE2 under OpenVMS +(F) Auxiliary files for building PCRE2 using other build systems + + BUILD.bazel ) + MODULE.bazel ) files used by the Bazel build system + WORKSPACE.bazel ) + build.zig file used by zig's build system + +(G) Auxiliary files for building PCRE2 under OpenVMS vms/configure.com ) vms/openvms_readme.txt ) These files were contributed by a PCRE2 user. vms/pcre2.h_patch ) vms/stdint.h ) -Philip Hazel -Email local part: Philip.Hazel -Email domain: gmail.com -Last updated: 15 April 2024 +============================== +Last updated: 18 December 2024 +============================== + diff --git a/RunGrepTest b/RunGrepTest index c382187..396884c 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -25,8 +25,8 @@ unset cp ls mv rm # valgrind settings when requested. builddir=`pwd` -pcre2grep=$builddir/pcre2grep -pcre2test=$builddir/pcre2test +: ${pcre2grep:=$builddir/pcre2grep} +: ${pcre2test:=$builddir/pcre2test} if [ ! -x $pcre2grep ] ; then echo "** $pcre2grep does not exist or is not executable." @@ -41,22 +41,17 @@ fi valgrind= while [ $# -gt 0 ] ; do case $1 in - valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file";; + valgrind|-valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file --error-exitcode=70";; *) echo "RunGrepTest: Unknown argument $1"; exit 1;; esac shift done -vjs= pcre2grep_version=`$pcre2grep -V` if [ "$valgrind" = "" ] ; then echo "Testing $pcre2grep_version" else echo "Testing $pcre2grep_version using valgrind" - $pcre2test -C jit >/dev/null - if [ $? -ne 0 ]; then - vjs="--suppressions=./testdata/valgrind-jit.supp" - fi fi # Set up a suitable "diff" command for comparison. Some systems have a diff @@ -105,6 +100,16 @@ if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then fi fi +# Set up the path to the valgrind JIT suppressions + +vjs= +if [ "$valgrind" != "" ] ; then + $pcre2test -C jit >/dev/null + if [ $? -ne 0 ]; then + vjs="--suppressions=`realpath "$srcdir"`/testdata/valgrind-jit.supp" + fi +fi + # Check for the availability of UTF-8 support $pcre2test -C unicode >/dev/null @@ -275,7 +280,7 @@ echo "---------------------------- Test 35 -----------------------------" >>test echo "RC=$?" >>testtrygrep echo "---------------------------- Test 36 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude='grepinput(Bad)?8' --exclude=grepinputM --exclude=grepinputUN --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 37 -----------------------------" >>testtrygrep @@ -318,8 +323,11 @@ echo "RC=$?" >>testtrygrep echo "---------------------------- Test 46 ------------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -e 'unopened)' -e abc ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep --regex=123 -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep @@ -530,25 +538,28 @@ echo "---------------------------- Test 95 -----------------------------" >>test echo "RC=$?" >>testtrygrep echo "---------------------------- Test 96 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MC] 'fox' ./test* | sort) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MCU] 'fox' ./test* | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 97 -----------------------------" >>testtrygrep echo "grepinput$" >testtemp1grep echo "grepinput8" >>testtemp1grep -(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "grepinputBad8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 98 -----------------------------" >>testtrygrep echo "grepinput$" >testtemp1grep echo "grepinput8" >>testtemp1grep -(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MC] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "grepinputBad8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 99 -----------------------------" >>testtrygrep echo "grepinput$" >testtemp1grep echo "grepinput8" >testtemp2grep -(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "grepinputBad8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 100 ------------------------------" >>testtrygrep @@ -618,7 +629,7 @@ echo "---------------------------- Test 115 -----------------------------" >>tes echo "RC=$?" >>testtrygrep echo "---------------------------- Test 116 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MC] -th 'the' testdata/grepinput*) >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MCU] -th 'the' testdata/grepinput*) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test 117 -----------------------------" >>testtrygrep @@ -637,6 +648,8 @@ echo "RC=$?" >>testtrygrep echo "---------------------------- Test 120 ------------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$&:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep echo "RC=$?" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 @@ -761,7 +774,7 @@ echo "---------------------------- Test 140 -----------------------------" >>tes echo "RC=$?" >>testtrygrep echo "---------------------------- Test 141 -----------------------------" >>testtrygrep -printf "$srcdir/testdata/grepinputv\n-\n" >testtemp1grep +printf "%s/testdata/grepinputv\n-\n" "$srcdir" >testtemp1grep printf 'This is a line from stdin.' >testtemp2grep $valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" >testtrygrep 2>&1 echo "RC=$?" >>testtrygrep @@ -842,16 +855,17 @@ echo "RC=$?" >>testtrygrep echo "---------------------------- Test 150 -----------------------------" >>testtrygrep which locale >/dev/null 2>&1 if [ $? -ne 0 ]; then - echo "pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE)" >>testtrygrep + echo "pcre2grep: Failed to set locale locale.bad (obtained from LC_CTYPE)" >>testtrygrep echo "RC=2" >>testtrygrep else - (cd $srcdir; unset LC_ALL; env LC_CTYPE=badlocale $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1 + (cd $srcdir; unset LC_ALL; LC_CTYPE=locale.bad $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep fi echo "---------------------------- Test 151 -----------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep echo "---------------------------- Test 152 -----------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep @@ -861,6 +875,42 @@ echo "---------------------------- Test 153 -----------------------------" >>tes (cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep echo "RC=$?" >>testtrygrep +echo "---------------------------- Test 154 -----------------------------" >>testtrygrep +>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 155 -----------------------------" >>testtrygrep +echo "" >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 156 -----------------------------" >>testtrygrep +echo "" >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file --file $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 157 -----------------------------" >>testtrygrep +echo "spaces " >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --posix-pattern-file --file=$builddir/testtemp1grep ./testdata/grepinputv >$builddir/testtemp2grep && $valgrind $vjs $pcre2grep -q "s " $builddir/testtemp2grep) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 158 -----------------------------" >>testtrygrep +echo "spaces." >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 159 -----------------------------" >>testtrygrep +printf "spaces.\r\n" >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file -f$builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 160 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nC3 '^(ert|jkl)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -n -B4 -A2 '^(ert|dfg)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + # Now compare the results. @@ -886,13 +936,11 @@ if [ $utf8 -ne 0 ] ; then echo "RC=$?" >>testtrygrep echo "---------------------------- Test U4 ------------------------------" >>testtrygrep - printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep - (cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' $builddir/testtemp1grep) >>testtrygrep 2>&1 + (cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' ./testdata/grepinputBad8) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test U5 ------------------------------" >>testtrygrep - printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep - (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' ./testdata/grepinputBad8) >>testtrygrep echo "RC=$?" >>testtrygrep echo "---------------------------- Test U6 -----------------------------" >>testtrygrep @@ -935,31 +983,48 @@ printf 'abc\rdef\r\nghi\njkl' >testNinputgrep printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep $valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep $valgrind $vjs $pcre2grep -B1 -n -N CR "^def" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep $valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep $valgrind $vjs $pcre2grep -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep pattern=`printf 'def\rjkl'` $valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep $valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep $valgrind $vjs $pcre2grep -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep $valgrind $vjs $pcre2grep -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep printf 'xyz\0abc\0def' >testNinputgrep $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep $valgrind $vjs $pcre2grep -B1 -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N8 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -na --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep +echo "RC=$?" >>testtrygrep + echo "" >>testtrygrep $cf $srcdir/testdata/grepoutputN testtrygrep @@ -972,8 +1037,13 @@ if [ $utf8 -ne 0 ] ; then echo "Testing pcre2grep newline settings with UTF-8 features" printf '%c--------------------------- Test UN1 ------------------------------\r\n' - >testtrygrep - printf 'abc\341\210\264def\nxyz' >testNinputgrep - $valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" testNinputgrep >>testtrygrep + $valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" $srcdir/testdata/grepinputUN >>testtrygrep + echo "RC=$?" >>testtrygrep + + printf '%c--------------------------- Test UN2 ------------------------------\r\n' - >testtrygrep + $valgrind $vjs $pcre2grep -nauU --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "" >>testtrygrep $cf $srcdir/testdata/grepoutputUN testtrygrep @@ -990,12 +1060,24 @@ fi if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then echo "Testing pcre2grep script callouts" - $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep + echo "--- Test 1 ---" >testtrygrep + $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 2 ---" >>testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 3 ---" >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 4 ---" >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"/bin/echo|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 5 ---" >>testtrygrep $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 6 ---" >>testtrygrep $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then nonfork=1 @@ -1010,8 +1092,12 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri if [ $utf8 -ne 0 ] ; then echo "Testing pcre2grep script callout with UTF-8 features" - $valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >testtrygrep + echo "--- Test 1 ---" >testtrygrep + $valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 2 ---" >>testtrygrep $valgrind $vjs $pcre2grep -u '(T)(?C"/bin/echo|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep if [ $nonfork = 1 ] ; then $cf $srcdir/testdata/grepoutputCNU testtrygrep @@ -1019,7 +1105,11 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri $cf $srcdir/testdata/grepoutputCU testtrygrep fi if [ $? != 0 ] ; then exit 1; fi + else + echo "Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library" fi + + unset nonfork else echo "Script callouts are not supported" fi diff --git a/RunGrepTest.bat b/RunGrepTest.bat index 4a095a3..b7ee629 100644 --- a/RunGrepTest.bat +++ b/RunGrepTest.bat @@ -19,8 +19,9 @@ set GREP_COLOR= :: Remember the current (build) directory and set the program to be tested. set builddir="%CD%" -set pcre2grep=%builddir%\pcre2grep.exe -set pcre2test=%builddir%\pcre2test.exe + +if [%pcre2grep%]==[] set pcre2grep=%builddir%\pcre2grep.exe +if [%pcre2test%]==[] set pcre2test=%builddir%\pcre2test.exe if NOT exist %pcre2grep% ( echo ** %pcre2grep% does not exist. @@ -81,11 +82,16 @@ if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" ( ) :: Create a simple printf via cscript/JScript (an actual printf may translate -:: LF to CRLF, which this one does not). +:: LF to CRLF, which this one does not). We only support the barebones we need: +:: \r, \n, \0, and %s (but only once). -echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js +echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n").replace(/\\0/g, "\x00").replace(/%%s/g, function() { return WScript.Arguments(1) })) >printf.js set printf=cscript //nologo printf.js +:: Create a simple 'tr' via cscript/JScript. +echo WScript.StdOut.Write(WScript.StdIn.ReadAll().replace(/\x00/g, "@")) >trnull.js +set trnull=cscript //nologo trnull.js + :: ------ Normal tests ------ echo Testing pcre2grep main features @@ -232,7 +238,7 @@ echo ---------------------------- Test 35 ----------------------------->>testtry echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 36 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include="grepinput[^C]" --exclude "grepinput$" --exclude="grepinput(Bad)?8" --exclude=grepinputM --exclude=grepinputUN --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 37 ----------------------------->>testtrygrep @@ -274,8 +280,14 @@ echo ---------------------------- Test 45 ------------------------------>>testtr echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 46 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -e "unopened)" -e abc ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep (pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex=123 -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 47 ------------------------------>>testtrygrep (pushd %srcdir% & %pcre2grep% -Fx AB.VE^ @@ -320,11 +332,11 @@ echo ---------------------------- Test 55 ----------------------------->>testtry echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 56 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -c --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 57 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -c -l --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 58 ----------------------------->>testtrygrep @@ -378,6 +390,12 @@ echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 70 ----------------------------->>testtrygrep (pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 71 ----------------------------->>testtrygrep (pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep @@ -481,25 +499,28 @@ echo ---------------------------- Test 95 ----------------------------->>testtry echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 96 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" --exclude=grepinput[MCU] "fox" ./test* | sort & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 97 ----------------------------->>testtrygrep echo grepinput$>testtemp1grep echo grepinput8>>testtemp1grep -(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo grepinputBad8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 98 ----------------------------->>testtrygrep echo grepinput$>testtemp1grep echo grepinput8>>testtemp1grep -(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo grepinputBad8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 99 ----------------------------->>testtrygrep echo grepinput$>testtemp1grep echo grepinput8>testtemp2grep -(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo grepinputBad8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 100 ------------------------------>>testtrygrep @@ -533,7 +554,7 @@ echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 107 ----------------------------->>testtrygrep echo a>testtemp1grep echo aaaaa>>testtemp1grep -(pushd %srcdir% & %pcre2grep% --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1 +(pushd %srcdir% & %pcre2grep% --line-offsets --allow-lookaround-bsk "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1 echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 108 ------------------------------>>testtrygrep @@ -541,7 +562,7 @@ echo ---------------------------- Test 108 ------------------------------>>testt echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 109 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -cq --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 110 ----------------------------->>testtrygrep @@ -557,27 +578,27 @@ echo ---------------------------- Test 112 ----------------------------->>testtr echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 113 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% --total-count --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 114 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -tc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 115 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -tlc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 116 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% --exclude=grepinput[MCU] -th "the" testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 117 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -tch --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 118 ----------------------------->>testtrygrep -(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep +(pushd %srcdir% & %pcre2grep% -tL --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 119 ----------------------------->>testtrygrep @@ -588,6 +609,266 @@ echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test 120 ------------------------------>>testtrygrep (pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "$&:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -m 1 -O "$0:$a$b$e$f$r$t$v" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "${X}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "XX$" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -O "$x{12345678}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -O "$x{123Z" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --output "$x{1234}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 121 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -F "\E and (regex)" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 122 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w "cat|dog" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 123 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w "dog|cat" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 124 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn --colour=always "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn --colour=always -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 125 ----------------------------->>testtrygrep +%printf% "abcd\n" >testNinputgrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K.)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?=.\K)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K[ac])" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +set GREP_COLORS=ms=1;20 +%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep +set GREP_COLORS= +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 126 ----------------------------->>testtrygrep +%printf% "Next line pattern has binary zero\nABC\0XYZ\n" >testtemp1grep +%printf% "ABC\0XYZ\nABCDEF\nDEFABC\n" >testtemp2grep +%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%printf% "Next line pattern is erroneous.\n^abc)(xy" >testtemp1grep +%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 127 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 128 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -m1M -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 129 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -m 2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 130 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 131 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -oc -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 132 ----------------------------->>testtrygrep +:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel +:: level, are not even numbered). Use a subshell instead. +(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& head -1) >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 133 ----------------------------->>testtrygrep +:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel +:: level, are not even numbered). Use a subshell instead. +(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& %pcre2grep% -m1 -A3 "^match") >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 134 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --max-count=1 -nH -O "=$x{41}$x423$o{103}$o1045=" "fox" - & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 135 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -lZ "word" ./testdata/grepinputv ./testdata/grepinputv & popd) | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -A 1 -B 1 -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -MHZn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 136 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -m1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --max-count=1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 137 ----------------------------->>testtrygrep +%printf% "Last line\nhas no newline" >testtemp1grep +%pcre2grep% -A1 Last testtemp1grep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 138 ----------------------------->>testtrygrep +%printf% "AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n" >testtemp1grep +%pcre2grep% --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 139 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-buffered "fox" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 140 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --buffer-size=10 -A1 "brown" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 141 ----------------------------->>testtrygrep +%printf% "%%s\testdata\grepinputv\n-\n" "%srcdir%" >testtemp1grep +%printf% "This is a line from stdin." >testtemp2grep +%pcre2grep% --file-list testtemp1grep "line from stdin" >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 142 ----------------------------->>testtrygrep +%printf% "/does/not/exist\n" >testtemp1grep +%printf% "This is a line from stdin." >testtemp2grep +%pcre2grep% --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 143 ----------------------------->>testtrygrep +%printf% "fox|cat" >testtemp1grep +%pcre2grep% -f - %srcdir%\testdata\grepinputv >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 144 ----------------------------->>testtrygrep +%pcre2grep% -f /non/exist %srcdir%\testdata\grepinputv >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 145 ----------------------------->>testtrygrep +%printf% "*meta*\rdog." >testtemp1grep +%pcre2grep% -Ncr -F -f testtemp1grep %srcdir%\testdata\grepinputv >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 146 ----------------------------->>testtrygrep +%printf% "A123B" >testtemp1grep +%pcre2grep% -H -e "123|fox" - >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -h -e "123|fox" - %srcdir%\testdata\grepinputv >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% - %srcdir%\testdata\grepinputv >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 147 ----------------------------->>testtrygrep +%pcre2grep% -e "123|fox" -- -nonfile >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 148 ----------------------------->>testtrygrep +%pcre2grep% --nonexist >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -n-n-bad >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --context >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --only-matching --output=xx >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --newline=badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -d badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -D badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --buffer-size=0 >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --exclude "(badpat" abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --include-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 149 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=binary "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=wrong "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 150 ----------------------------->>testtrygrep +:: The Unix version of this tests checks for whether locales are supported. On Windows, +:: we assume they always are. +set LC_ALL= +set LC_CTYPE=locale.bad +(pushd %srcdir% & %pcre2grep% abc /dev/null & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +set LC_CTYPE= + +echo ---------------------------- Test 151 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always -e this -e The -e "The wo" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 152 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 --group-separator="++" "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 153 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 --no-group-separator "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 154 ----------------------------->>testtrygrep +echo. >nul 2>testtemp1grep +(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 155 ----------------------------->>testtrygrep +echo. >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 156 ----------------------------->>testtrygrep +%printf% "\n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% --posix-pattern-file --file %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 157 ----------------------------->>testtrygrep +%printf% "spaces \n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% -o --posix-pattern-file --file=%builddir%\testtemp1grep ./testdata/grepinputv >%builddir%\testtemp2grep && %pcre2grep% -q "s " %builddir%\testtemp2grep & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 158 ----------------------------->>testtrygrep +%printf% "spaces.\n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 159 ----------------------------->>testtrygrep +%printf% "spaces.\r\n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% --posix-pattern-file -f%builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 160 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nC3 "^(ert|jkl)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -n -B4 -A2 "^(ert|dfg)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep :: Now compare the results. @@ -602,15 +883,43 @@ if %utf8% neq 0 ( echo ---------------------------- Test U1 ------------------------------>testtrygrep (pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep - echo RC=^%ERRORLEVEL%>>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep echo ---------------------------- Test U2 ------------------------------>>testtrygrep (pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep - echo RC=^%ERRORLEVEL%>>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep echo ---------------------------- Test U3 ------------------------------>>testtrygrep - (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep - echo RC=^%ERRORLEVEL%>>testtrygrep + (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any --allow-lookaround-bsk "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U4 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -u -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep 2>&1 + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U5 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -U -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U6 ----------------------------->>testtrygrep + (pushd %srcdir% & %pcre2grep% -u -m1 -O "=$x{1d3}$o{744}=" "fox" & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1 + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U7 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -ui --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U8 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -UiEP --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U9 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -u --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U10 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -u --posix-digit --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep %cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout% if ERRORLEVEL 1 exit /b 1 @@ -631,58 +940,159 @@ echo Testing pcre2grep newline settings echo ---------------------------- Test N1 ------------------------------>testtrygrep %pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n -N CR "^def" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test N2 ------------------------------>>testtrygrep %pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test N3 ------------------------------>>testtrygrep for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a %pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test N4 ------------------------------>>testtrygrep -%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep +%pcre2grep% -n --newline=crlf -F -f %srcdir%\testdata\greppatN4 testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test N5 ------------------------------>>testtrygrep %pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep echo ---------------------------- Test N6 ------------------------------>>testtrygrep %pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N7 ------------------------------>>testtrygrep +%printf% "xyz\0abc\0def" >testNinputgrep +%pcre2grep% -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N8 ------------------------------>>testtrygrep +%pcre2grep% -na --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +%printf% "\n" >>testtrygrep %cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout% if ERRORLEVEL 1 exit /b 1 -:: If pcre2grep supports script callouts, run some tests on them. + +:: These newline tests need UTF support. + +if %utf8% neq 0 ( + echo Testing pcre2grep newline settings with UTF-8 features + + echo ---------------------------- Test UN1 ------------------------------>testtrygrep + %pcre2grep% -nau --newline=anycrlf "^(abc|def)" %srcdir%\testdata\grepinputUN >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test UN2 ------------------------------>testtrygrep + %pcre2grep% -nauU --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + %printf% "\n" >>testtrygrep + + %cf% %srcdir%\testdata\grepoutputUN testtrygrep %cfout% + if ERRORLEVEL 1 exit /b 1 + +) else ( + echo Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library +) + + +:: If pcre2grep supports script callouts, run some tests on them. It is possible +:: to restrict these callouts to the non-fork case, either for security, or for +:: environments that do not support fork(). This is handled by comparing to a +:: different output. %pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported" if %ERRORLEVEL% equ 0 ( echo Testing pcre2grep script callouts - %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep - %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep - %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep - %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep - %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported" - if %ERRORLEVEL% equ 0 ( + + echo --- Test 1 --->testtrygrep + %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 2 --->>testtrygrep + %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 3 --->>testtrygrep + %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 4 --->>testtrygrep + %pcre2grep% "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 5 --->>testtrygrep + %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 6 --->>testtrygrep + %pcre2grep% -m1 "(T)(?C'|$0:$1:$x{41}$o{101}$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + %pcre2grep% --help | %pcre2grep% -q "Non-fork callout scripts in patterns are supported" + if ^!ERRORLEVEL! equ 0 ( + set nonfork=1 %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout% ) else ( + set nonfork=0 %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% ) if ERRORLEVEL 1 exit /b 1 + + @REM These callout tests need UTF support. + + if %utf8% neq 0 ( + echo Testing pcre2grep script callout with UTF-8 features + + echo --- Test 1 --->testtrygrep + %pcre2grep% -u "(T)(?C'|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 2 --->>testtrygrep + %pcre2grep% -u "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + if ^!nonfork! equ 1 ( + %cf% %srcdir%\testdata\grepoutputCNU testtrygrep %cfout% + ) else ( + %cf% %srcdir%\testdata\grepoutputCU testtrygrep %cfout% + ) + if ERRORLEVEL 1 exit /b 1 + + ) else ( + echo Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library + ) + ) else ( echo Script callouts are not supported ) + :: Finally, some tests to exercise code that is not tested above, just to be :: sure that it runs OK. Doing this improves the coverage statistics. The output :: is not checked. echo Testing miscellaneous pcre2grep arguments (unchecked) -%printf% "" >testtrygrep +echo. >nul 2>testtrygrep call :checkspecial "-xxxxx" 2 || exit /b 1 call :checkspecial "--help" 0 || exit /b 1 call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1 +call :checkspecial "--line-buffered --color abc nul" 1 || exit /b 1 +call :checkspecial "-dskip abc ." 1 || exit /b 1 +call :checkspecial "-Dread -Dskip abc nul" 1 || exit /b 1 + :: Clean up local working files -del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep +del testcf printf.js trnull.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep exit /b 0 diff --git a/RunTest b/RunTest index d426a59..dafef3e 100755 --- a/RunTest +++ b/RunTest @@ -88,8 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)" title23="Test 23: \C disabled test" title24="Test 24: Non-UTF pattern conversion tests" title25="Test 25: UTF pattern conversion tests" -title26="Test 26: Auto-generated unicode property tests" -maxtest=26 +title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)" +title27="Test 27: Auto-generated unicode property tests" +maxtest=27 titleheap="Test 'heap': Environment-specific heap tests" if [ $# -eq 1 -a "$1" = "list" ]; then @@ -120,6 +121,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then echo $title24 echo $title25 echo $title26 + echo $title27 echo "" echo $titleheap echo "" @@ -183,7 +185,7 @@ checkresult() checkspecial() { - $valgrind $vjs ./pcre2test $1 >>testtry + $sim $valgrind $vjs $pcre2test $1 >>testtry if [ $? -ne 0 ] ; then echo "** pcre2test $1 failed - check testtry" exit 1 @@ -191,24 +193,7 @@ checkspecial() } -# ------ Special EBCDIC Test ------- - -if [ $# -eq 1 -a "$1" = "ebcdic" ]; then - $valgrind ./pcre2test -C ebcdic >/dev/null - ebcdic=$? - if [ $ebcdic -ne 1 ] ; then - echo "Cannot run EBCDIC tests: EBCDIC support not compiled" - exit 1 - fi - for opt in "" "-dfa"; do - ./pcre2test -q $opt $testdata/testinputEBC >testtry - checkresult $? EBC "$opt" - done -exit 0 -fi - - -# ------ Normal Tests ------ +# ------ Test setup ------ # Default values @@ -221,10 +206,16 @@ sim= skip= valgrind= vjs= +: ${pcre2test:=./pcre2test} # This is in case the caller has set aliases (as I do - PH) unset cp ls mv rm +if [ ! -x $pcre2test ] ; then + echo "** $pcre2test does not exist or is not executable." + exit 1 +fi + # Process options and select which tests to run; for those that are explicitly # requested, check that the necessary optional facilities are available. @@ -255,7 +246,9 @@ do23=no do24=no do25=no do26=no +do27=no doheap=no +doebcdic=no while [ $# -gt 0 ] ; do case $1 in @@ -286,14 +279,16 @@ while [ $# -gt 0 ] ; do 24) do24=yes;; 25) do25=yes;; 26) do26=yes;; - heap) doheap=yes;; - -8) arg8=yes;; + 27) do27=yes;; + heap) doheap=yes;; + ebcdic) doebcdic=yes;; + -8) arg8=yes;; -16) arg16=yes;; -32) arg32=yes;; bigstack|-bigstack) bigstack=yes;; nojit|-nojit) nojit=yes;; sim|-sim) shift; sim=$1;; - valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file";; + valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";; valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";; ~*) if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then @@ -325,7 +320,7 @@ done # Find which optional facilities are available. -$sim ./pcre2test -C linksize >/dev/null +$sim $pcre2test -C linksize >/dev/null link_size=$? if [ $link_size -lt 2 ] ; then echo "RunTest: Failed to find internal link size" @@ -339,10 +334,10 @@ fi # If it is possible to set the system stack size and -bigstack was given, # set up a large stack. -$sim ./pcre2test -S 64 /dev/null /dev/null +$sim $pcre2test -S 32 /dev/null /dev/null support_setstack=$? if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then - setstack="-S 64" + setstack="-S 32" else setstack="" fi @@ -350,16 +345,16 @@ fi # All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only # one need be. -$sim ./pcre2test -C pcre2-8 >/dev/null +$sim $pcre2test -C pcre2-8 >/dev/null support8=$? -$sim ./pcre2test -C pcre2-16 >/dev/null +$sim $pcre2test -C pcre2-16 >/dev/null support16=$? -$sim ./pcre2test -C pcre2-32 >/dev/null +$sim $pcre2test -C pcre2-32 >/dev/null support32=$? # \C may be disabled -$sim ./pcre2test -C backslash-C >/dev/null +$sim $pcre2test -C backslash-C >/dev/null supportBSC=$? # Initialize all bitsizes skipped @@ -411,7 +406,7 @@ fi # sizes if both are supported; we can't have UTF-8 support without UTF-16 or # UTF-32 support. -$sim ./pcre2test -C unicode >/dev/null +$sim $pcre2test -C unicode >/dev/null utf=$? # When JIT is used with valgrind, we need to set up valgrind suppressions as @@ -419,7 +414,7 @@ utf=$? # the hardware supports SSE2. jitopt= -$sim ./pcre2test -C jit >/dev/null +$sim $pcre2test -C jit >/dev/null jit=$? if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then jitopt=-jit @@ -437,7 +432,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \ $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \ - $do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \ + $do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \ + $doheap = no -a $doebcdic = no \ ]; then do0=yes do1=yes @@ -466,6 +462,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ do24=yes do25=yes do26=yes + do27=yes fi # Handle any explicit skips at this stage, so that an argument list may consist @@ -477,9 +474,12 @@ for i in $skip; do eval do$i=no; done echo "" echo PCRE2 C library tests using test data from $testdata -$sim ./pcre2test /dev/null +$sim $pcre2test /dev/null echo "" + +# ------ Normal Tests ------ + for bmode in "$test8" "$test16" "$test32"; do case "$bmode" in skip) continue;; @@ -512,7 +512,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do1 = yes ] ; then echo $title1 for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry checkresult $? 1 "$opt" done fi @@ -524,10 +524,10 @@ for bmode in "$test8" "$test16" "$test32"; do echo $title2 "(excluding UTF-$bits)" cp $testdata/testbtables . for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry saverc=$? if [ $saverc = 0 ] ; then - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,300 >>testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $bmode $opt -error -80,-62,-2,-1,0,100,101,191,300 >>testtry checkresult $? 2 "$opt" else checkresult $saverc 2 "$opt" @@ -553,7 +553,7 @@ for bmode in "$test8" "$test16" "$test32"; do locale -a | grep "^$loc\$" >/dev/null if [ $? -eq 0 ] ; then echo "/a/locale=$loc" | \ - $sim $valgrind ./pcre2test -q $bmode | \ + $sim $valgrind $pcre2test -q $bmode | \ grep "Failed to set locale" >/dev/null if [ $? -ne 0 ] ; then locale=$loc @@ -580,7 +580,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ "$locale" != "" ] ; then echo $title3 "(using '$locale' locale)" for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $infile testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $infile testtry if [ $? = 0 ] ; then case "$opt" in -jit) with=" with JIT";; @@ -617,7 +617,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry checkresult $? 4 "$opt" done fi @@ -629,7 +629,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry checkresult $? 5 "$opt" done fi @@ -639,7 +639,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do6 = yes ] ; then echo $title6 - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput6 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput6 testtry checkresult $? 6 "" fi @@ -648,7 +648,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry checkresult $? 7 "" fi fi @@ -666,7 +666,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput8 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput8 testtry checkresult $? 8-$bits-$link_size "" fi fi @@ -679,7 +679,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped when running 16/32-bit tests" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry checkresult $? 9 "$opt" done fi @@ -695,7 +695,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry checkresult $? 10 "$opt" done fi @@ -709,7 +709,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped when running 8-bit tests" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry checkresult $? 11-$bits "$opt" done fi @@ -726,7 +726,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry checkresult $? 12-$bits "$opt" done fi @@ -739,7 +739,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ "$bits" = "8" ] ; then echo " Skipped when running 8-bit tests" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput13 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput13 testtry checkresult $? 13 "" fi fi @@ -751,7 +751,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry checkresult $? 14-$bits "" fi fi @@ -760,7 +760,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do15 = yes ] ; then echo $title15 - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput15 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput15 testtry checkresult $? 15 "" fi @@ -771,7 +771,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $jit -ne 0 ] ; then echo " Skipped because JIT is available" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput16 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput16 testtry checkresult $? 16 "" fi fi @@ -783,7 +783,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then echo " Skipped because JIT is not available or nojit was specified" else - $sim $valgrind $vjs ./pcre2test -q $setstack $bmode $testdata/testinput17 testtry + $sim $valgrind $vjs $pcre2test -q $setstack $bmode $testdata/testinput17 testtry checkresult $? 17 "" fi fi @@ -795,7 +795,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ "$bits" = "16" -o "$bits" = "32" ] ; then echo " Skipped when running 16/32-bit tests" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput18 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput18 testtry checkresult $? 18 "" fi fi @@ -809,7 +809,7 @@ for bmode in "$test8" "$test16" "$test32"; do elif [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput19 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput19 testtry checkresult $? 19 "" fi fi @@ -818,7 +818,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $do20 = yes ] ; then echo $title20 - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput20 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput20 testtry checkresult $? 20 "" fi @@ -830,7 +830,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because \C is disabled" else for opt in "" $jitopt -dfa; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry checkresult $? 21 "$opt" done fi @@ -846,7 +846,7 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry checkresult $? 22-$bits "$opt" done fi @@ -859,7 +859,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $supportBSC -ne 0 ] ; then echo " Skipped because \C is not disabled" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput23 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput23 testtry checkresult $? 23 "" fi fi @@ -868,7 +868,7 @@ for bmode in "$test8" "$test16" "$test32"; do if [ "$do24" = yes ] ; then echo $title24 - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput24 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput24 testtry checkresult $? 24 "" fi @@ -879,12 +879,12 @@ for bmode in "$test8" "$test16" "$test32"; do if [ $utf -eq 0 ] ; then echo " Skipped because UTF-$bits support is not available" else - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput25 testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput25 testtry checkresult $? 25 "" fi fi - # Auto-generated unicode property tests + # Unicode property tests if [ $do26 = yes ] ; then echo $title26 @@ -892,24 +892,55 @@ for bmode in "$test8" "$test16" "$test32"; do echo " Skipped because UTF-$bits support is not available" else for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry checkresult $? 26 "$opt" done fi fi + # Auto-generated Unicode property tests + + if [ $do27 = yes ] ; then + echo $title27 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput27 testtry + checkresult $? 27 "$opt" + done + fi + fi + # Manually selected heap tests - output may vary in different environments, # which is why that are not automatically run. if [ $doheap = yes ] ; then echo $titleheap - $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinputheap testtry checkresult $? heap-$bits "" fi # End of loop for 8/16/32-bit tests done + +# ------ Special EBCDIC Test ------- + +if [ $doebcdic = yes ] ; then + $sim $valgrind $pcre2test -C ebcdic >/dev/null + ebcdic=$? + if [ $ebcdic -ne 1 ] ; then + echo "Cannot run EBCDIC tests: EBCDIC support not compiled" + exit 1 + fi + for opt in "" "-dfa"; do + $sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry + checkresult $? EBC "$opt" + done +fi + + # Clean up local working files rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry diff --git a/RunTest.bat b/RunTest.bat index 9f203fe..67e5202 100644 --- a/RunTest.bat +++ b/RunTest.bat @@ -13,7 +13,7 @@ @rem line. Added argument validation and added error reporting. @rem @rem Sheri Pierce added logic to skip feature dependent tests -@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support +@rem tests 4 5 7 10 12 14 19 22 25 and 26 require Unicode support @rem 8 requires Unicode and link size 2 @rem 16 requires absence of jit support @rem 17 requires presence of jit support @@ -27,8 +27,8 @@ @rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015. @rem PH added missing "set type" for test 22, April 2016. @rem PH added copy command for new testbtables file, November 2020 -@rem PH caused it to show comparison output when comparison faile, July 2023 -@rem PH updated unknown error number in test +@rem PH caused it to show comparison output when comparison failed, July 2023 +@rem PH updated unknown error number in test setlocal enabledelayedexpansion @@ -39,7 +39,7 @@ if exist ..\testdata\ set srcdir=..) if [%srcdir%]==[] ( if exist ..\..\testdata\ set srcdir=..\..) if NOT exist %srcdir%\testdata\ ( -Error: echo distribution testdata folder not found! +echo Error: distribution testdata folder not found! call :conferror exit /b 1 goto :eof @@ -82,7 +82,7 @@ if not exist testout16 md testout16 if not exist testoutjit16 md testoutjit16 ) -if %support16% EQU 1 ( +if %support32% EQU 1 ( if not exist testout32 md testout32 if not exist testoutjit32 md testoutjit32 ) @@ -110,20 +110,24 @@ set do20=no set do21=no set do22=no set do23=no +set do24=no +set do25=no +set do26=no +set do27=no set all=yes for %%a in (%*) do ( set valid=no - for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes + for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27) do if %%v == %%a set valid=yes if "!valid!" == "yes" ( set do%%a=yes set all=no -) else ( + ) else ( echo Invalid test number - %%a! - echo Usage %0 [ test_number ] ... - echo Where test_number is one or more optional test numbers 1 through 23, default is all tests. - exit /b 1 -) + echo Usage %0 [ test_number ] ... + echo Where test_number is one or more optional test numbers 1 through 27, default is all tests. + exit /b 1 + ) ) set failed="no" @@ -137,9 +141,9 @@ if "%all%" == "yes" ( set do7=yes set do8=yes set do9=yes - set do10=no + set do10=yes set do11=yes - set do12=no + set do12=yes set do13=yes set do14=yes set do15=yes @@ -151,6 +155,10 @@ if "%all%" == "yes" ( set do21=yes set do22=yes set do23=yes + set do24=yes + set do25=yes + set do26=yes + set do27=yes ) @echo RunTest.bat's pcre2test output is written to newly created subfolders @@ -202,6 +210,10 @@ if "%do20%" == "yes" call :do20 if "%do21%" == "yes" call :do21 if "%do22%" == "yes" call :do22 if "%do23%" == "yes" call :do23 +if "%do24%" == "yes" call :do24 +if "%do25%" == "yes" call :do25 +if "%do26%" == "yes" call :do26 +if "%do27%" == "yes" call :do27 :modeSkip if "%mode%" == "" ( set mode=-16 @@ -247,7 +259,15 @@ if [%3] == [] ( ) if %1 == 8 ( - set outnum=8-%bits%-%link_size% + set outnum=%1-%bits%-%link_size% +) else if %1 == 11 ( + set outnum=%1-%bits% +) else if %1 == 12 ( + set outnum=%1-%bits% +) else if %1 == 14 ( + set outnum=%1-%bits% +) else if %1 == 22 ( + set outnum=%1-%bits% ) else ( set outnum=%1 ) @@ -266,24 +286,10 @@ if errorlevel 1 ( set failed="yes" goto :eof ) else if [%1]==[2] ( - %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput% + %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -80,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput% ) -set type= -if [%1]==[11] ( - set type=-%bits% -) -if [%1]==[12] ( - set type=-%bits% -) -if [%1]==[14] ( - set type=-%bits% -) -if [%1]==[22] ( - set type=-%bits% -) - -fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL +fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% >NUL if errorlevel 1 ( echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% @@ -294,7 +300,7 @@ if errorlevel 1 ( echo. goto :eof ) - fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% + fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% set failed="yes" goto :eof @@ -309,7 +315,7 @@ if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit goto :eof :do2 - copy /y %srcdir%\testdata\testbtables testbtables + copy /y %srcdir%\testdata\testbtables testbtables call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit goto :eof @@ -504,6 +510,36 @@ if %supportBSC% EQU 1 ( call :runsub 23 testout "Backslash-C disabled test" -q goto :eof +:do24 +call :runsub 24 testout "Non-UTF pattern conversion tests" -q +goto :eof + +:do25 +if %unicode% EQU 0 ( + echo Test 25 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 25 testout "UTF pattern conversion tests" -q +goto :eof + +:do26 +if %unicode% EQU 0 ( + echo Test 26 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 26 testout "Unicode property tests (Compatible with Perl >= 5.38)" -q + if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do27 +if %unicode% EQU 0 ( + echo Test 27 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 27 testout "Auto-generated unicode property tests" -q + if %jit% EQU 1 call :runsub 27 testoutjit "Test with JIT Override" -q -jit +goto :eof + :conferror @echo. @echo Either your build is incomplete or you have a configuration error. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..1e3a05b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,44 @@ +# Security policies + +## Release security + +The PCRE2 project provides source-only releases, with no binaries. + +These source releases can be downloaded from the +[GitHub Releases](https://github.com/PCRE2Project/pcre2/releases) page. Each +release file is GPG-signed. + +* Releases up to and including 10.44 are signed by Philip Hazel (GPG key: + 45F68D54BBE23FB3039B46E59766E084FB0F43D8) +* Releases from 10.45 onwards will be signed by Nicholas Wilson (GPG key: + A95536204A3BB489715231282A98E77EB6F24CA8, cross-signed by Philip + Hazel's key for release continuity) + +From releases 10.45 onwards, the source code will additionally be provided via +Git checkout of the (GPG-signed) release tag. + +Please contact the maintainers for any queries about release integrity or the +project's supply-chain. + +## Reporting vulnerabilities + +The PCRE2 project prioritises security. We appreciate third-party testing and +security research, and would be grateful if you could responsibly disclose your +findings to us. We will make every effort to acknowledge your contributions. + +To report a security issue, please use the GitHub Security Advisory +["Report a Vulnerability"](https://github.com/PCRE2Project/pcre2/security/advisories/new) +tab. (Alternatively, if you prefer you may send a GPG-encrypted email to one of +the maintainers.) + +### Timeline + +As a very small volunteer team, we cannot guarantee rapid response, but would +aim to respond within 1 week, or perhaps 2 during holidays. + +### Response procedure + +PCRE2 has never previously made a rapid or embargoed release in response to a +security incident. We would work with security managers from trusted downstream +distributors, such as major Linux distributions, before disclosing the +vulnerability publicly. diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel new file mode 100644 index 0000000..4ce2c8c --- /dev/null +++ b/WORKSPACE.bazel @@ -0,0 +1 @@ +# See MODULE.bazel diff --git a/aclocal.m4 b/aclocal.m4 index db0861a..d307cf7 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -14,8 +14,8 @@ m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl -m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.72],, -[m4_warning([this file was generated for autoconf 2.72. +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],, +[m4_warning([this file was generated for autoconf 2.71. You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..4cb6d99 --- /dev/null +++ b/build.zig @@ -0,0 +1,173 @@ +const std = @import("std"); + +pub const CodeUnitWidth = enum { + @"8", + @"16", + @"32", +}; + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + const linkage = b.option(std.builtin.LinkMode, "linkage", "whether to statically or dynamically link the library") orelse @as(std.builtin.LinkMode, if (target.result.isGnuLibC()) .dynamic else .static); + const codeUnitWidth = b.option(CodeUnitWidth, "code-unit-width", "Sets the code unit width") orelse .@"8"; + + const pcre2_header_dir = b.addWriteFiles(); + const pcre2_header = pcre2_header_dir.addCopyFile(b.path("src/pcre2.h.generic"), "pcre2.h"); + + const config_header = b.addConfigHeader( + .{ + .style = .{ .cmake = b.path("config-cmake.h.in") }, + .include_path = "config.h", + }, + .{ + .HAVE_ASSERT_H = true, + .HAVE_UNISTD_H = (target.result.os.tag != .windows), + .HAVE_WINDOWS_H = (target.result.os.tag == .windows), + + .HAVE_MEMMOVE = true, + .HAVE_STRERROR = true, + + .SUPPORT_PCRE2_8 = codeUnitWidth == CodeUnitWidth.@"8", + .SUPPORT_PCRE2_16 = codeUnitWidth == CodeUnitWidth.@"16", + .SUPPORT_PCRE2_32 = codeUnitWidth == CodeUnitWidth.@"32", + .SUPPORT_UNICODE = true, + + .PCRE2_EXPORT = null, + .PCRE2_LINK_SIZE = 2, + .PCRE2_HEAP_LIMIT = 20000000, + .PCRE2_MATCH_LIMIT = 10000000, + .PCRE2_MATCH_LIMIT_DEPTH = "MATCH_LIMIT", + .PCRE2_MAX_VARLOOKBEHIND = 255, + .NEWLINE_DEFAULT = 2, + .PCRE2_PARENS_NEST_LIMIT = 250, + }, + ); + + // pcre2-8/16/32.so + + const lib = std.Build.Step.Compile.create(b, .{ + .name = b.fmt("pcre2-{s}", .{@tagName(codeUnitWidth)}), + .root_module = .{ + .target = target, + .optimize = optimize, + .link_libc = true, + }, + .kind = .lib, + .linkage = linkage, + }); + + lib.defineCMacro("HAVE_CONFIG_H", null); + lib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth)); + if (linkage == .static) { + lib.defineCMacro("PCRE2_STATIC", null); + } + + lib.addConfigHeader(config_header); + lib.addIncludePath(pcre2_header_dir.getDirectory()); + lib.addIncludePath(b.path("src")); + + lib.addCSourceFile(.{ + .file = b.addWriteFiles().addCopyFile(b.path("src/pcre2_chartables.c.dist"), "pcre2_chartables.c"), + }); + + lib.addCSourceFiles(.{ + .files = &.{ + "src/pcre2_auto_possess.c", + "src/pcre2_chkdint.c", + "src/pcre2_compile.c", + "src/pcre2_compile_class.c", + "src/pcre2_config.c", + "src/pcre2_context.c", + "src/pcre2_convert.c", + "src/pcre2_dfa_match.c", + "src/pcre2_error.c", + "src/pcre2_extuni.c", + "src/pcre2_find_bracket.c", + "src/pcre2_jit_compile.c", + "src/pcre2_maketables.c", + "src/pcre2_match.c", + "src/pcre2_match_data.c", + "src/pcre2_newline.c", + "src/pcre2_ord2utf.c", + "src/pcre2_pattern_info.c", + "src/pcre2_script_run.c", + "src/pcre2_serialize.c", + "src/pcre2_string_utils.c", + "src/pcre2_study.c", + "src/pcre2_substitute.c", + "src/pcre2_substring.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + "src/pcre2_xclass.c", + }, + }); + + lib.installHeader(pcre2_header, "pcre2.h"); + b.installArtifact(lib); + + + // pcre2test + + const pcre2test = b.addExecutable(.{ + .name = "pcre2test", + .target = target, + .optimize = optimize, + }); + + + // pcre2-posix.so + + if (codeUnitWidth == CodeUnitWidth.@"8") { + const posixLib = std.Build.Step.Compile.create(b, .{ + .name = "pcre2-posix", + .root_module = .{ + .target = target, + .optimize = optimize, + .link_libc = true, + }, + .kind = .lib, + .linkage = linkage, + }); + + posixLib.defineCMacro("HAVE_CONFIG_H", null); + posixLib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth)); + if (linkage == .static) { + posixLib.defineCMacro("PCRE2_STATIC", null); + } + + posixLib.addConfigHeader(config_header); + posixLib.addIncludePath(pcre2_header_dir.getDirectory()); + posixLib.addIncludePath(b.path("src")); + + posixLib.addCSourceFiles(.{ + .files = &.{ + "src/pcre2posix.c", + }, + }); + + posixLib.installHeader(b.path("src/pcre2posix.h"), "pcre2posix.h"); + b.installArtifact(posixLib); + + pcre2test.linkLibrary(posixLib); + } + + + // pcre2test (again) + + pcre2test.defineCMacro("HAVE_CONFIG_H", null); + + pcre2test.addConfigHeader(config_header); + pcre2test.addIncludePath(pcre2_header_dir.getDirectory()); + pcre2test.addIncludePath(b.path("src")); + + pcre2test.addCSourceFile(.{ + .file = b.path("src/pcre2test.c"), + }); + + pcre2test.linkLibC(); + pcre2test.linkLibrary(lib); + + b.installArtifact(pcre2test); +} diff --git a/cmake/COPYING-CMAKE-SCRIPTS b/cmake/COPYING-CMAKE-SCRIPTS index 4b41776..53b6b71 100644 --- a/cmake/COPYING-CMAKE-SCRIPTS +++ b/cmake/COPYING-CMAKE-SCRIPTS @@ -7,7 +7,7 @@ are met: 2. Redistributions in binary form must reproduce the copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -3. The name of the author may not be used to endorse or promote products +3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR diff --git a/cmake/FindEditline.cmake b/cmake/FindEditline.cmake index 1f0c951..38d075f 100644 --- a/cmake/FindEditline.cmake +++ b/cmake/FindEditline.cmake @@ -2,15 +2,12 @@ if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) set(EDITLINE_FOUND TRUE) -else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) - FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES - editline - edit/readline - ) - - FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit) +else() + find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline) + + find_library(EDITLINE_LIBRARY NAMES edit) include(FindPackageHandleStandardArgs) - FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) + find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) - MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) -endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) + mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) +endif() diff --git a/cmake/FindPackageHandleStandardArgs.cmake b/cmake/FindPackageHandleStandardArgs.cmake deleted file mode 100644 index 151d812..0000000 --- a/cmake/FindPackageHandleStandardArgs.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... ) -# This macro is intended to be used in FindXXX.cmake modules files. -# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and -# it also sets the _FOUND variable. -# The package is found if all variables listed are TRUE. -# Example: -# -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) -# -# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and -# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. -# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, -# independent whether QUIET was used or not. -# If it is found, the location is reported using the VAR1 argument, so -# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. -# If the second argument is DEFAULT_MSG, the message in the failure case will -# be "Could NOT find LibXml2", if you don't like this message you can specify -# your own custom failure message there. - -MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) - - IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - IF (${_NAME}_FIND_REQUIRED) - SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") - ELSE (${_NAME}_FIND_REQUIRED) - SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") - ENDIF (${_NAME}_FIND_REQUIRED) - ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - SET(_FAIL_MESSAGE "${_FAIL_MSG}") - ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - - STRING(TOUPPER ${_NAME} _NAME_UPPER) - - SET(${_NAME_UPPER}_FOUND TRUE) - IF(NOT ${_VAR1}) - SET(${_NAME_UPPER}_FOUND FALSE) - ENDIF(NOT ${_VAR1}) - - FOREACH(_CURRENT_VAR ${ARGN}) - IF(NOT ${_CURRENT_VAR}) - SET(${_NAME_UPPER}_FOUND FALSE) - ENDIF(NOT ${_CURRENT_VAR}) - ENDFOREACH(_CURRENT_VAR) - - IF (${_NAME_UPPER}_FOUND) - IF (NOT ${_NAME}_FIND_QUIETLY) - MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}") - ENDIF (NOT ${_NAME}_FIND_QUIETLY) - ELSE (${_NAME_UPPER}_FOUND) - IF (${_NAME}_FIND_REQUIRED) - MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}") - ELSE (${_NAME}_FIND_REQUIRED) - IF (NOT ${_NAME}_FIND_QUIETLY) - MESSAGE(STATUS "${_FAIL_MESSAGE}") - ENDIF (NOT ${_NAME}_FIND_QUIETLY) - ENDIF (${_NAME}_FIND_REQUIRED) - ENDIF (${_NAME_UPPER}_FOUND) -ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/cmake/FindReadline.cmake b/cmake/FindReadline.cmake index 1d4cc55..6b65046 100644 --- a/cmake/FindReadline.cmake +++ b/cmake/FindReadline.cmake @@ -5,25 +5,23 @@ # GNU Readline library finder if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) set(READLINE_FOUND TRUE) -else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) - FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h - /usr/include/readline - ) - -# 2008-04-22 The next clause used to read like this: -# -# FIND_LIBRARY(READLINE_LIBRARY NAMES readline) -# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses ) -# include(FindPackageHandleStandardArgs) -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY ) -# -# I was advised to modify it such that it will find an ncurses library if -# required, but not if one was explicitly given, that is, it allows the -# default to be overridden. PH +else() + find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline) + + # 2008-04-22 The next clause used to read like this: + # + # FIND_LIBRARY(READLINE_LIBRARY NAMES readline) + # FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses ) + # include(FindPackageHandleStandardArgs) + # FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY ) + # + # I was advised to modify it such that it will find an ncurses library if + # required, but not if one was explicitly given, that is, it allows the + # default to be overridden. PH - FIND_LIBRARY(READLINE_LIBRARY NAMES readline) - include(FindPackageHandleStandardArgs) - FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY ) + find_library(READLINE_LIBRARY NAMES readline) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY) - MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY) -endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) + mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY) +endif() diff --git a/cmake/pcre2-config-version.cmake.in b/cmake/pcre2-config-version.cmake.in index dac149e..db00606 100644 --- a/cmake/pcre2-config-version.cmake.in +++ b/cmake/pcre2-config-version.cmake.in @@ -4,8 +4,7 @@ set(PACKAGE_VERSION_PATCH 0) set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0) # Check whether the requested PACKAGE_FIND_VERSION is compatible -if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR - PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) diff --git a/cmake/pcre2-config.cmake.in b/cmake/pcre2-config.cmake.in index 84eebad..082dc19 100644 --- a/cmake/pcre2-config.cmake.in +++ b/cmake/pcre2-config.cmake.in @@ -30,33 +30,49 @@ set(PCRE2_16BIT_NAME pcre2-16) set(PCRE2_32BIT_NAME pcre2-32) set(PCRE2_POSIX_NAME pcre2-posix) find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory") -if (PCRE2_USE_STATIC_LIBS) - if (MSVC) +if(PCRE2_USE_STATIC_LIBS) + if(MSVC) set(PCRE2_8BIT_NAME pcre2-8-static) set(PCRE2_16BIT_NAME pcre2-16-static) set(PCRE2_32BIT_NAME pcre2-32-static) set(PCRE2_POSIX_NAME pcre2-posix-static) - endif () + endif() set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) -else () +else() set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX}) - if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) + if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) set(PCRE2_PREFIX "") - endif () + endif() set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) - if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) + if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) set(PCRE2_SUFFIX "-0.dll") elseif(MSVC) set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif () -endif () -find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library") -find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library") -find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library") -find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library") + endif() +endif() +find_library( + PCRE2_8BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} + DOC "8 bit PCRE2 library" +) +find_library( + PCRE2_16BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} + DOC "16 bit PCRE2 library" +) +find_library( + PCRE2_32BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} + DOC "32 bit PCRE2 library" +) +find_library( + PCRE2_POSIX_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} + DOC "8 bit POSIX PCRE2 library" +) unset(PCRE2_NON_STANDARD_LIB_PREFIX) unset(PCRE2_NON_STANDARD_LIB_SUFFIX) unset(PCRE2_8BIT_NAME) @@ -65,51 +81,55 @@ unset(PCRE2_32BIT_NAME) unset(PCRE2_POSIX_NAME) # Set version -if (PCRE2_INCLUDE_DIR) +if(PCRE2_INCLUDE_DIR) set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0") -endif () +endif() # Which components have been found. -if (PCRE2_8BIT_LIBRARY) +if(PCRE2_8BIT_LIBRARY) set(PCRE2_8BIT_FOUND TRUE) -endif () -if (PCRE2_16BIT_LIBRARY) +endif() +if(PCRE2_16BIT_LIBRARY) set(PCRE2_16BIT_FOUND TRUE) -endif () -if (PCRE2_32BIT_LIBRARY) +endif() +if(PCRE2_32BIT_LIBRARY) set(PCRE2_32BIT_FOUND TRUE) -endif () -if (PCRE2_POSIX_LIBRARY) +endif() +if(PCRE2_POSIX_LIBRARY) set(PCRE2_POSIX_FOUND TRUE) -endif () +endif() # Check if at least one component has been specified. list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS) -if (PCRE2_NCOMPONENTS LESS 1) +if(PCRE2_NCOMPONENTS LESS 1) message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.") -endif () +endif() unset(PCRE2_NCOMPONENTS) # When POSIX component has been specified make sure that also 8BIT component is specified. set(PCRE2_8BIT_COMPONENT FALSE) set(PCRE2_POSIX_COMPONENT FALSE) foreach(component ${PCRE2_FIND_COMPONENTS}) - if (component STREQUAL "8BIT") + if(component STREQUAL "8BIT") set(PCRE2_8BIT_COMPONENT TRUE) - elseif (component STREQUAL "POSIX") + elseif(component STREQUAL "POSIX") set(PCRE2_POSIX_COMPONENT TRUE) - endif () + endif() endforeach() -if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT) - message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.") +if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT) + message( + FATAL_ERROR + "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component." + ) endif() unset(PCRE2_8BIT_COMPONENT) unset(PCRE2_POSIX_COMPONENT) include(FindPackageHandleStandardArgs) set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") -find_package_handle_standard_args(PCRE2 +find_package_handle_standard_args( + PCRE2 FOUND_VAR PCRE2_FOUND REQUIRED_VARS PCRE2_INCLUDE_DIR HANDLE_COMPONENTS @@ -118,31 +138,31 @@ find_package_handle_standard_args(PCRE2 ) set(PCRE2_LIBRARIES) -if (PCRE2_FOUND) +if(PCRE2_FOUND) foreach(component ${PCRE2_FIND_COMPONENTS}) - if (PCRE2_USE_STATIC_LIBS) + if(PCRE2_USE_STATIC_LIBS) add_library(PCRE2::${component} STATIC IMPORTED) target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC) - else () + else() add_library(PCRE2::${component} SHARED IMPORTED) - endif () - set_target_properties(PCRE2::${component} PROPERTIES - IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" - IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" + endif() + set_target_properties( + PCRE2::${component} + PROPERTIES + IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" + IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" ) - if (component STREQUAL "POSIX") - set_target_properties(PCRE2::${component} PROPERTIES - INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" - LINK_LIBRARIES "PCRE2::8BIT" + if(component STREQUAL "POSIX") + set_target_properties( + PCRE2::${component} + PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT" ) - endif () + endif() set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY}) mark_as_advanced(PCRE2_${component}_LIBRARY) endforeach() -endif () +endif() -mark_as_advanced( - PCRE2_INCLUDE_DIR -) +mark_as_advanced(PCRE2_INCLUDE_DIR) diff --git a/config-cmake.h.in b/config-cmake.h.in index 6539d77..0eff0e0 100644 --- a/config-cmake.h.in +++ b/config-cmake.h.in @@ -1,6 +1,9 @@ /* config.h for CMake builds */ +#cmakedefine HAVE_ASSERT_H 1 +#cmakedefine HAVE_BUILTIN_ASSUME 1 #cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1 +#cmakedefine HAVE_BUILTIN_UNREACHABLE 1 #cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1 #cmakedefine HAVE_DIRENT_H 1 #cmakedefine HAVE_SYS_STAT_H 1 @@ -17,7 +20,6 @@ #cmakedefine SUPPORT_PCRE2_8 1 #cmakedefine SUPPORT_PCRE2_16 1 #cmakedefine SUPPORT_PCRE2_32 1 -#cmakedefine PCRE2_DEBUG 1 #cmakedefine DISABLE_PERCENT_ZT 1 #cmakedefine SUPPORT_LIBBZ2 1 @@ -39,11 +41,11 @@ #cmakedefine HEAP_MATCH_RECURSE 1 #cmakedefine NEVER_BACKSLASH_C 1 -#define PCRE2_EXPORT @PCRE2_EXPORT@ -#define LINK_SIZE @PCRE2_LINK_SIZE@ +#define PCRE2_EXPORT @PCRE2_EXPORT@ +#define LINK_SIZE @PCRE2_LINK_SIZE@ #define HEAP_LIMIT @PCRE2_HEAP_LIMIT@ -#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ -#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ +#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ +#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ #define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@ #define NEWLINE_DEFAULT @NEWLINE_DEFAULT@ #define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@ diff --git a/config.guess b/config.guess index e81d3ae..7f76b62 100755 --- a/config.guess +++ b/config.guess @@ -1,14 +1,14 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-06-03' +timestamp='2022-01-09' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -60,7 +60,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -437,7 +437,7 @@ case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in # This test works for both compilers. if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then SUN_ARCH=x86_64 @@ -929,6 +929,9 @@ EOF i*:PW*:*) GUESS=$UNAME_MACHINE-pc-pw32 ;; + *:SerenityOS:*:*) + GUESS=$UNAME_MACHINE-pc-serenity + ;; *:Interix*:*) case $UNAME_MACHINE in x86) @@ -1522,6 +1525,9 @@ EOF i*86:rdos:*:*) GUESS=$UNAME_MACHINE-pc-rdos ;; + i*86:Fiwix:*:*) + GUESS=$UNAME_MACHINE-pc-fiwix + ;; *:AROS:*:*) GUESS=$UNAME_MACHINE-unknown-aros ;; diff --git a/config.sub b/config.sub index d74fb6d..dba16e8 100755 --- a/config.sub +++ b/config.sub @@ -1,14 +1,14 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-08-14' +timestamp='2022-01-03' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -76,7 +76,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -1020,6 +1020,11 @@ case $cpu-$vendor in ;; # Here we normalize CPU types with a missing or matching vendor + armh-unknown | armh-alt) + cpu=armv7l + vendor=alt + basic_os=${basic_os:-linux-gnueabihf} + ;; dpx20-unknown | dpx20-bull) cpu=rs6000 vendor=bull @@ -1121,7 +1126,7 @@ case $cpu-$vendor in xscale-* | xscalee[bl]-*) cpu=`echo "$cpu" | sed 's/^xscale/arm/'` ;; - arm64-*) + arm64-* | aarch64le-*) cpu=aarch64 ;; @@ -1304,7 +1309,7 @@ esac if test x$basic_os != x then -# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just +# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just # set os. case $basic_os in gnu/linux*) @@ -1748,7 +1753,8 @@ case $os in | skyos* | haiku* | rdos* | toppers* | drops* | es* \ | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ - | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr*) + | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \ + | fiwix* ) ;; # This one is extra strict with allowed versions sco3.2v2 | sco3.2v[4-9]* | sco5v6*) diff --git a/configure b/configure index fa7e664..90d2ee8 100755 --- a/configure +++ b/configure @@ -1,9 +1,9 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.72 for PCRE2 10.44. +# Generated by GNU Autoconf 2.71 for PCRE2 10.45-RC1. # # -# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Inc. # # @@ -15,6 +15,7 @@ # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh +as_nop=: if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh @@ -23,13 +24,12 @@ then : # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else case e in #( - e) case `(set -o) 2>/dev/null` in #( +else $as_nop + case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; -esac ;; esac fi @@ -101,7 +101,7 @@ IFS=$as_save_IFS ;; esac -# We did not find ourselves, most probably we were run as 'sh COMMAND' +# We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 @@ -131,14 +131,15 @@ case $- in # (((( esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed 'exec'. +# out after a failed `exec'. printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh NULLCMD=: @@ -146,13 +147,12 @@ then : # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST -else case e in #( - e) case \`(set -o) 2>/dev/null\` in #( +else \$as_nop + case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; -esac ;; esac fi " @@ -170,9 +170,8 @@ as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ) then : -else case e in #( - e) exitcode=1; echo positional parameters were not saved. ;; -esac +else \$as_nop + exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 blah=\$(echo \$(echo blah)) @@ -194,15 +193,14 @@ test \$(( 1 + 1 )) = 2 || exit 1" if (eval "$as_required") 2>/dev/null then : as_have_required=yes -else case e in #( - e) as_have_required=no ;; -esac +else $as_nop + as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null then : -else case e in #( - e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do @@ -235,13 +233,12 @@ IFS=$as_save_IFS if $as_found then : -else case e in #( - e) if { test -f "$SHELL" || test -f "$SHELL.exe"; } && +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null then : CONFIG_SHELL=$SHELL as_have_required=yes -fi ;; -esac +fi fi @@ -263,7 +260,7 @@ case $- in # (((( esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail -# out after a failed 'exec'. +# out after a failed `exec'. printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi @@ -282,8 +279,7 @@ $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 -fi ;; -esac +fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} @@ -322,6 +318,14 @@ as_fn_exit () as_fn_set_status $1 exit $1 } # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_mkdir_p # ------------- @@ -390,12 +394,11 @@ then : { eval $1+=\$2 }' -else case e in #( - e) as_fn_append () +else $as_nop + as_fn_append () { eval $1=\$$1\$2 - } ;; -esac + } fi # as_fn_append # as_fn_arith ARG... @@ -409,14 +412,21 @@ then : { as_val=$(( $* )) }' -else case e in #( - e) as_fn_arith () +else $as_nop + as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` - } ;; -esac + } fi # as_fn_arith +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- @@ -490,8 +500,6 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits /[$]LINENO/= ' <$as_myself | sed ' - t clear - :clear s/[$]LINENO.*/&-/ t lineno b @@ -540,6 +548,7 @@ esac as_echo='printf %s\n' as_echo_n='printf %s' + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -551,9 +560,9 @@ if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: - # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. - # In both cases, we have to default to 'cp -pR'. + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then @@ -578,12 +587,10 @@ as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. -as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" -as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. -as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" -as_tr_sh="eval sed '$as_sed_sh'" # deprecated +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" SHELL=${CONFIG_SHELL-/bin/sh} @@ -611,8 +618,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PCRE2' PACKAGE_TARNAME='pcre2' -PACKAGE_VERSION='10.44' -PACKAGE_STRING='PCRE2 10.44' +PACKAGE_VERSION='10.45-RC1' +PACKAGE_STRING='PCRE2 10.45-RC1' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -649,7 +656,6 @@ ac_includes_default="\ #endif" ac_header_c_list= -enable_year2038=no ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS @@ -711,7 +717,6 @@ PCRE2_PRERELEASE PCRE2_MINOR PCRE2_MAJOR HAVE_VISIBILITY -VISIBILITY_CXXFLAGS VISIBILITY_CFLAGS LT_SYS_LIBRARY_PATH OTOOL64 @@ -888,7 +893,6 @@ enable_fuzz_support enable_diff_fuzz_support enable_stack_for_recursion enable_percent_zt -enable_year2038 ' ac_precious_vars='build_alias host_alias @@ -1014,7 +1018,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: '$ac_useropt'" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1040,7 +1044,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: '$ac_useropt'" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1253,7 +1257,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: '$ac_useropt'" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1269,7 +1273,7 @@ do ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: '$ac_useropt'" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in @@ -1299,8 +1303,8 @@ do | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; - -*) as_fn_error $? "unrecognized option: '$ac_option' -Try '$0 --help' for more information" + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" ;; *=*) @@ -1308,7 +1312,7 @@ Try '$0 --help' for more information" # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) - as_fn_error $? "invalid variable name: '$ac_envvar'" ;; + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; @@ -1358,7 +1362,7 @@ do as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done -# There might be people who depend on the old broken behavior: '$host' +# There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias @@ -1426,7 +1430,7 @@ if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi -ac_msg="sources are in $srcdir, but 'cd $srcdir' does not work" +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` @@ -1454,7 +1458,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -'configure' configures PCRE2 10.44 to adapt to many kinds of systems. +\`configure' configures PCRE2 10.45-RC1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1468,11 +1472,11 @@ Configuration: --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit - -q, --quiet, --silent do not print 'checking ...' messages + -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] - -C, --config-cache alias for '--cache-file=config.cache' + -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files - --srcdir=DIR find the sources in DIR [configure dir or '..'] + --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX @@ -1480,10 +1484,10 @@ Installation directories: --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] -By default, 'make install' will install all the files in -'$ac_default_prefix/bin', '$ac_default_prefix/lib' etc. You can specify -an installation prefix other than '$ac_default_prefix' using '--prefix', -for instance '--prefix=\$HOME'. +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. For better control, use the options below. @@ -1525,7 +1529,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PCRE2 10.44:";; + short | recursive ) echo "Configuration of PCRE2 10.45-RC1:";; esac cat <<\_ACEOF @@ -1573,7 +1577,7 @@ Optional Features: --enable-never-backslash-C use of \C causes an error --enable-ebcdic assume EBCDIC coding rather than ASCII; incompatible - with --enable-utf; use only in (uncommon) EBCDIC + with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables --enable-ebcdic-nl25 set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic @@ -1590,7 +1594,6 @@ Optional Features: --enable-diff-fuzz-support enable differential fuzzer support --disable-percent-zt disable the use of z and t formatting modifiers - --enable-year2038 support timestamps after 2038 Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1644,7 +1647,7 @@ Some influential environment variables: LCOV the ltp lcov program GENHTML the ltp genhtml program -Use these variables to override the choices made by 'configure' or to help +Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to the package provider. @@ -1711,10 +1714,10 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PCRE2 configure 10.44 -generated by GNU Autoconf 2.72 +PCRE2 configure 10.45-RC1 +generated by GNU Autoconf 2.71 -Copyright (C) 2023 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF @@ -1753,12 +1756,11 @@ printf "%s\n" "$ac_try_echo"; } >&5 } && test -s conftest.$ac_objext then : ac_retval=0 -else case e in #( - e) printf "%s\n" "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 ;; -esac + ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval @@ -1777,8 +1779,8 @@ printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> @@ -1786,12 +1788,10 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : eval "$3=yes" -else case e in #( - e) eval "$3=no" ;; -esac +else $as_nop + eval "$3=no" fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -1812,8 +1812,8 @@ printf %s "checking for int$2_t... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else case e in #( - e) eval "$3=no" +else $as_nop + eval "$3=no" # Order is important - never check a type that is potentially smaller # than half of the expected target width. for ac_type in int$2_t 'int' 'long int' \ @@ -1854,13 +1854,12 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : -else case e in #( - e) case $ac_type in #( +else $as_nop + case $ac_type in #( int$2_t) : eval "$3=yes" ;; #( *) : eval "$3=\$ac_type" ;; -esac ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext @@ -1869,12 +1868,10 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext if eval test \"x\$"$3"\" = x"no" then : -else case e in #( - e) break ;; -esac +else $as_nop + break fi - done ;; -esac + done fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -1914,12 +1911,11 @@ printf "%s\n" "$ac_try_echo"; } >&5 } then : ac_retval=0 -else case e in #( - e) printf "%s\n" "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 ;; -esac + ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would @@ -1942,15 +1938,15 @@ printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, - which can conflict with char $2 (void); below. */ + which can conflict with char $2 (); below. */ #include #undef $2 @@ -1961,7 +1957,7 @@ else case e in #( #ifdef __cplusplus extern "C" #endif -char $2 (void); +char $2 (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ @@ -1980,13 +1976,11 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : eval "$3=yes" -else case e in #( - e) eval "$3=no" ;; -esac +else $as_nop + eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext ;; -esac + conftest$ac_exeext conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -2007,8 +2001,8 @@ printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 -else case e in #( - e) eval "$3=no" +else $as_nop + eval "$3=no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 @@ -2038,14 +2032,12 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : -else case e in #( - e) eval "$3=yes" ;; -esac +else $as_nop + eval "$3=yes" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 @@ -2077,8 +2069,8 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PCRE2 $as_me 10.44, which was -generated by GNU Autoconf 2.72. Invocation command line was +It was created by PCRE2 $as_me 10.45-RC1, which was +generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -2324,10 +2316,10 @@ esac printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ - || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file -See 'config.log' for more details" "$LINENO" 5; } +See \`config.log' for more details" "$LINENO" 5; } fi done @@ -2363,7 +2355,9 @@ struct stat; /* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ struct buf { int x; }; struct buf * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (char **p, int i) +static char *e (p, i) + char **p; + int i; { return p[i]; } @@ -2377,21 +2371,6 @@ static char *f (char * (*g) (char **, int), char **p, ...) return s; } -/* C89 style stringification. */ -#define noexpand_stringify(a) #a -const char *stringified = noexpand_stringify(arbitrary+token=sequence); - -/* C89 style token pasting. Exercises some of the corner cases that - e.g. old MSVC gets wrong, but not very hard. */ -#define noexpand_concat(a,b) a##b -#define expand_concat(a,b) noexpand_concat(a,b) -extern int vA; -extern int vbee; -#define aye A -#define bee B -int *pvA = &expand_concat(v,aye); -int *pvbee = &noexpand_concat(v,bee); - /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not \xHH hex character constants. These do not provoke an error unfortunately, instead are silently treated @@ -2419,19 +2398,16 @@ ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); # Test code for whether the C compiler supports C99 (global declarations) ac_c_conftest_c99_globals=' -/* Does the compiler advertise C99 conformance? */ +// Does the compiler advertise C99 conformance? #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L # error "Compiler does not advertise C99 conformance" #endif -// See if C++-style comments work. - #include extern int puts (const char *); extern int printf (const char *, ...); extern int dprintf (int, const char *, ...); extern void *malloc (size_t); -extern void free (void *); // Check varargs macros. These examples are taken from C99 6.10.3.5. // dprintf is used instead of fprintf to avoid needing to declare @@ -2481,6 +2457,7 @@ typedef const char *ccp; static inline int test_restrict (ccp restrict text) { + // See if C++-style comments work. // Iterate through items via the restricted pointer. // Also check for declarations in for loops. for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) @@ -2546,8 +2523,6 @@ ac_c_conftest_c99_main=' ia->datasize = 10; for (int i = 0; i < ia->datasize; ++i) ia->data[i] = i * 1.234; - // Work around memory leak warnings. - free (ia); // Check named initializers. struct named_init ni = { @@ -2569,7 +2544,7 @@ ac_c_conftest_c99_main=' # Test code for whether the C compiler supports C11 (global declarations) ac_c_conftest_c11_globals=' -/* Does the compiler advertise C11 conformance? */ +// Does the compiler advertise C11 conformance? #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L # error "Compiler does not advertise C11 conformance" #endif @@ -2763,9 +2738,8 @@ IFS=$as_save_IFS if $as_found then : -else case e in #( - e) as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 ;; -esac +else $as_nop + as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 fi @@ -2793,12 +2767,12 @@ for ac_var in $ac_precious_vars; do eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&5 -printf "%s\n" "$as_me: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was not set in the previous run" >&5 -printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) @@ -2807,18 +2781,18 @@ printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;} ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' has changed since the previous run:" >&5 -printf "%s\n" "$as_me: error: '$ac_var' has changed since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&5 -printf "%s\n" "$as_me: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: '$ac_old_val'" >&5 -printf "%s\n" "$as_me: former value: '$ac_old_val'" >&2;} - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: '$ac_new_val'" >&5 -printf "%s\n" "$as_me: current value: '$ac_new_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. @@ -2834,11 +2808,11 @@ printf "%s\n" "$as_me: current value: '$ac_new_val'" >&2;} fi done if $ac_cache_corrupted; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run '${MAKE-make} distclean' and/or 'rm $cache_file' + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## @@ -2877,8 +2851,8 @@ if test -z "$INSTALL"; then if test ${ac_cv_path_install+y} then : printf %s "(cached) " >&6 -else case e in #( - e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS @@ -2932,8 +2906,7 @@ esac IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir - ;; -esac + fi if test ${ac_cv_path_install+y}; then INSTALL=$ac_cv_path_install @@ -3029,7 +3002,7 @@ test "$program_prefix" != NONE && test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. -# By default was 's,x,x', remove it if useless. +# By default was `s,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` @@ -3072,8 +3045,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_STRIP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$STRIP"; then +else $as_nop + if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3095,8 +3068,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then @@ -3118,8 +3090,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_STRIP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_STRIP"; then +else $as_nop + if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3141,8 +3113,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then @@ -3178,8 +3149,8 @@ if test -z "$MKDIR_P"; then if test ${ac_cv_path_mkdir+y} then : printf %s "(cached) " >&6 -else case e in #( - e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS @@ -3193,7 +3164,7 @@ do as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir ('*'coreutils) '* | \ - *'BusyBox '* | \ + 'BusyBox '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext break 3;; @@ -3202,17 +3173,18 @@ do done done IFS=$as_save_IFS - ;; -esac + fi test -d ./--version && rmdir ./--version if test ${ac_cv_path_mkdir+y}; then MKDIR_P="$ac_cv_path_mkdir -p" else - # As a last resort, use plain mkdir -p, - # in the hope it doesn't have the bugs of ancient mkdir. - MKDIR_P='mkdir -p' + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 @@ -3227,8 +3199,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AWK+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$AWK"; then +else $as_nop + if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3250,8 +3222,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then @@ -3273,8 +3244,8 @@ ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if eval test \${ac_cv_prog_make_${ac_make}_set+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat >conftest.make <<\_ACEOF +else $as_nop + cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' @@ -3286,8 +3257,7 @@ case `${MAKE-make} -f conftest.make 2>/dev/null` in *) eval ac_cv_prog_make_${ac_make}_set=no;; esac -rm -f conftest.make ;; -esac +rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 @@ -3325,8 +3295,8 @@ printf %s "checking whether $am_make supports nested variables... " >&6; } if test ${am_cv_make_support_nested_variables+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if printf "%s\n" 'TRUE=$(BAR$(V)) +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 @@ -3336,8 +3306,7 @@ am__doit: am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no -fi ;; -esac +fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } @@ -3372,7 +3341,7 @@ fi # Define the identity of the package. PACKAGE='pcre2' - VERSION='10.44' + VERSION='10.45-RC1' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -3492,8 +3461,8 @@ printf %s "checking whether $am_make supports nested variables... " >&6; } if test ${am_cv_make_support_nested_variables+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if printf "%s\n" 'TRUE=$(BAR$(V)) +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 @@ -3503,8 +3472,7 @@ am__doit: am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no -fi ;; -esac +fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } @@ -3555,8 +3523,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then +else $as_nop + if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3578,8 +3546,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then @@ -3601,8 +3568,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_CC"; then +else $as_nop + if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3624,8 +3591,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then @@ -3660,8 +3626,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then +else $as_nop + if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3683,8 +3649,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then @@ -3706,8 +3671,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then +else $as_nop + if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no @@ -3746,8 +3711,7 @@ if test $ac_prog_rejected = yes; then ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" fi fi -fi ;; -esac +fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then @@ -3771,8 +3735,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then +else $as_nop + if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3794,8 +3758,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then @@ -3821,8 +3784,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_CC"; then +else $as_nop + if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3844,8 +3807,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then @@ -3883,8 +3845,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$CC"; then +else $as_nop + if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3906,8 +3868,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then @@ -3929,8 +3890,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_CC"; then +else $as_nop + if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -3952,8 +3913,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then @@ -3982,10 +3942,10 @@ fi fi -test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH -See 'config.log' for more details" "$LINENO" 5; } +See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 @@ -4057,8 +4017,8 @@ printf "%s\n" "$ac_try_echo"; } >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : - # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. -# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. @@ -4078,7 +4038,7 @@ do ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an '-o' + # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. @@ -4089,9 +4049,8 @@ do done test "$ac_cv_exeext" = no && ac_cv_exeext= -else case e in #( - e) ac_file='' ;; -esac +else $as_nop + ac_file='' fi if test -z "$ac_file" then : @@ -4100,14 +4059,13 @@ printf "%s\n" "no" >&6; } printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables -See 'config.log' for more details" "$LINENO" 5; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } ;; -esac +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 printf %s "checking for C compiler default output file name... " >&6; } @@ -4131,10 +4089,10 @@ printf "%s\n" "$ac_try_echo"; } >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : - # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) -# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will -# work properly (i.e., refer to 'conftest.exe'), while it won't with -# 'rm'. + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in @@ -4144,12 +4102,11 @@ for ac_file in conftest.exe conftest conftest.*; do * ) break;; esac done -else case e in #( - e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link -See 'config.log' for more details" "$LINENO" 5; } ;; -esac +See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 @@ -4165,8 +4122,6 @@ int main (void) { FILE *f = fopen ("conftest.out", "w"); - if (!f) - return 1; return ferror (f) || fclose (f) != 0; ; @@ -4206,27 +4161,26 @@ printf "%s\n" "$ac_try_echo"; } >&5 if test "$cross_compiling" = maybe; then cross_compiling=yes else - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "cannot run C compiled programs. -If you meant to cross compile, use '--host'. -See 'config.log' for more details" "$LINENO" 5; } +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 printf "%s\n" "$cross_compiling" >&6; } -rm -f conftest.$ac_ext conftest$ac_cv_exeext \ - conftest.o conftest.obj conftest.out +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 printf %s "checking for suffix of object files... " >&6; } if test ${ac_cv_objext+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -4258,18 +4212,16 @@ then : break;; esac done -else case e in #( - e) printf "%s\n" "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile -See 'config.log' for more details" "$LINENO" 5; } ;; -esac +See \`config.log' for more details" "$LINENO" 5; } fi -rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; -esac +rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 printf "%s\n" "$ac_cv_objext" >&6; } @@ -4280,8 +4232,8 @@ printf %s "checking whether the compiler supports GNU C... " >&6; } if test ${ac_cv_c_compiler_gnu+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -4298,14 +4250,12 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_compiler_gnu=yes -else case e in #( - e) ac_compiler_gnu=no ;; -esac +else $as_nop + ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } @@ -4323,8 +4273,8 @@ printf %s "checking whether $CC accepts -g... " >&6; } if test ${ac_cv_prog_cc_g+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_save_c_werror_flag=$ac_c_werror_flag +else $as_nop + ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" @@ -4342,8 +4292,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes -else case e in #( - e) CFLAGS="" +else $as_nop + CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4358,8 +4308,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : -else case e in #( - e) ac_c_werror_flag=$ac_save_c_werror_flag +else $as_nop + ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4376,15 +4326,12 @@ if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag ;; -esac + ac_c_werror_flag=$ac_save_c_werror_flag fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 printf "%s\n" "$ac_cv_prog_cc_g" >&6; } @@ -4411,8 +4358,8 @@ printf %s "checking for $CC option to enable C11 features... " >&6; } if test ${ac_cv_prog_cc_c11+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_prog_cc_c11=no +else $as_nop + ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4429,28 +4376,25 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c11" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC ;; -esac +CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c11" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else case e in #( - e) if test "x$ac_cv_prog_cc_c11" = x +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } - CC="$CC $ac_cv_prog_cc_c11" ;; -esac + CC="$CC $ac_cv_prog_cc_c11" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 - ac_prog_cc_stdc=c11 ;; -esac + ac_prog_cc_stdc=c11 fi fi if test x$ac_prog_cc_stdc = xno @@ -4460,8 +4404,8 @@ printf %s "checking for $CC option to enable C99 features... " >&6; } if test ${ac_cv_prog_cc_c99+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_prog_cc_c99=no +else $as_nop + ac_cv_prog_cc_c99=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4478,28 +4422,25 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c99" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC ;; -esac +CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c99" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else case e in #( - e) if test "x$ac_cv_prog_cc_c99" = x +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } - CC="$CC $ac_cv_prog_cc_c99" ;; -esac + CC="$CC $ac_cv_prog_cc_c99" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 - ac_prog_cc_stdc=c99 ;; -esac + ac_prog_cc_stdc=c99 fi fi if test x$ac_prog_cc_stdc = xno @@ -4509,8 +4450,8 @@ printf %s "checking for $CC option to enable C89 features... " >&6; } if test ${ac_cv_prog_cc_c89+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_prog_cc_c89=no +else $as_nop + ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -4527,28 +4468,25 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC ;; -esac +CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c89" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } -else case e in #( - e) if test "x$ac_cv_prog_cc_c89" = x +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } - CC="$CC $ac_cv_prog_cc_c89" ;; -esac + CC="$CC $ac_cv_prog_cc_c89" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 - ac_prog_cc_stdc=c89 ;; -esac + ac_prog_cc_stdc=c89 fi fi @@ -4569,8 +4507,8 @@ printf %s "checking whether $CC understands -c and -o together... " >&6; } if test ${am_cv_prog_cc_c_o+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -4600,8 +4538,7 @@ _ACEOF fi done rm -f core conftest* - unset am_i ;; -esac + unset am_i fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } @@ -4691,8 +4628,8 @@ printf %s "checking dependency style of $depcc... " >&6; } if test ${am_cv_CC_dependencies_compiler_type+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then +else $as_nop + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up @@ -4796,8 +4733,7 @@ else case e in #( else am_cv_CC_dependencies_compiler_type=none fi - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } @@ -4856,8 +4792,8 @@ printf %s "checking whether it is safe to define __EXTENSIONS__... " >&6; } if test ${ac_cv_safe_to_define___extensions__+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ # define __EXTENSIONS__ 1 @@ -4873,12 +4809,10 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_safe_to_define___extensions__=yes -else case e in #( - e) ac_cv_safe_to_define___extensions__=no ;; -esac +else $as_nop + ac_cv_safe_to_define___extensions__=no fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5 printf "%s\n" "$ac_cv_safe_to_define___extensions__" >&6; } @@ -4888,8 +4822,8 @@ printf %s "checking whether _XOPEN_SOURCE should be defined... " >&6; } if test ${ac_cv_should_define__xopen_source+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_cv_should_define__xopen_source=no +else $as_nop + ac_cv_should_define__xopen_source=no if test $ac_cv_header_wchar_h = yes then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -4908,8 +4842,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #define _XOPEN_SOURCE 500 @@ -4927,12 +4861,10 @@ if ac_fn_c_try_compile "$LINENO" then : ac_cv_should_define__xopen_source=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -fi ;; -esac +fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_should_define__xopen_source" >&5 printf "%s\n" "$ac_cv_should_define__xopen_source" >&6; } @@ -4957,8 +4889,6 @@ printf "%s\n" "$ac_cv_should_define__xopen_source" >&6; } printf "%s\n" "#define __STDC_WANT_IEC_60559_DFP_EXT__ 1" >>confdefs.h - printf "%s\n" "#define __STDC_WANT_IEC_60559_EXT__ 1" >>confdefs.h - printf "%s\n" "#define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1" >>confdefs.h printf "%s\n" "#define __STDC_WANT_IEC_60559_TYPES_EXT__ 1" >>confdefs.h @@ -4978,9 +4908,8 @@ then : printf "%s\n" "#define _POSIX_1_SOURCE 2" >>confdefs.h -else case e in #( - e) MINIX= ;; -esac +else $as_nop + MINIX= fi if test $ac_cv_safe_to_define___extensions__ = yes then : @@ -5017,8 +4946,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AR+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$AR"; then +else $as_nop + if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5040,8 +4969,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then @@ -5067,8 +4995,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_AR+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_AR"; then +else $as_nop + if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5090,8 +5018,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then @@ -5126,8 +5053,8 @@ printf %s "checking the archiver ($AR) interface... " >&6; } if test ${am_cv_ar_interface+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_ext=c +else $as_nop + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' @@ -5170,8 +5097,7 @@ ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 printf "%s\n" "$am_cv_ar_interface" >&6; } @@ -5214,8 +5140,8 @@ esac -macro_version='2.5.0.1-38c1-dirty' -macro_revision='2.5.0.1' +macro_version='2.4.7' +macro_revision='2.4.7' @@ -5243,16 +5169,15 @@ printf %s "checking build system type... " >&6; } if test ${ac_cv_build+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_build_alias=$build_alias +else $as_nop + ac_build_alias=$build_alias test "x$ac_build_alias" = x && ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` test "x$ac_build_alias" = x && as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 printf "%s\n" "$ac_cv_build" >&6; } @@ -5279,15 +5204,14 @@ printf %s "checking host system type... " >&6; } if test ${ac_cv_host+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test "x$host_alias" = x; then +else $as_nop + if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 fi - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 printf "%s\n" "$ac_cv_host" >&6; } @@ -5383,8 +5307,8 @@ printf %s "checking for a sed that does not truncate output... " >&6; } if test ${ac_cv_path_SED+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ +else $as_nop + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" done @@ -5409,10 +5333,9 @@ do as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED -case `"$ac_path_SED" --version 2>&1` in #( +case `"$ac_path_SED" --version 2>&1` in *GNU*) ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; -#( *) ac_count=0 printf %s 0123456789 >"conftest.in" @@ -5447,8 +5370,7 @@ IFS=$as_save_IFS else ac_cv_path_SED=$SED fi - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 printf "%s\n" "$ac_cv_path_SED" >&6; } @@ -5473,8 +5395,8 @@ printf %s "checking for grep that handles long lines and -e... " >&6; } if test ${ac_cv_path_GREP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -z "$GREP"; then +else $as_nop + if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5493,10 +5415,9 @@ do as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP -case `"$ac_path_GREP" --version 2>&1` in #( +case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; -#( *) ac_count=0 printf %s 0123456789 >"conftest.in" @@ -5531,8 +5452,7 @@ IFS=$as_save_IFS else ac_cv_path_GREP=$GREP fi - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 printf "%s\n" "$ac_cv_path_GREP" >&6; } @@ -5544,8 +5464,8 @@ printf %s "checking for egrep... " >&6; } if test ${ac_cv_path_EGREP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 +else $as_nop + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then @@ -5567,10 +5487,9 @@ do as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP -case `"$ac_path_EGREP" --version 2>&1` in #( +case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; -#( *) ac_count=0 printf %s 0123456789 >"conftest.in" @@ -5606,23 +5525,20 @@ else ac_cv_path_EGREP=$EGREP fi - fi ;; -esac + fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 printf "%s\n" "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" - EGREP_TRADITIONAL=$EGREP - ac_cv_path_EGREP_TRADITIONAL=$EGREP { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 printf %s "checking for fgrep... " >&6; } if test ${ac_cv_path_FGREP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 +else $as_nop + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 then ac_cv_path_FGREP="$GREP -F" else if test -z "$FGREP"; then @@ -5644,10 +5560,9 @@ do as_fn_executable_p "$ac_path_FGREP" || continue # Check for GNU ac_path_FGREP and select it if it is found. # Check for GNU $ac_path_FGREP -case `"$ac_path_FGREP" --version 2>&1` in #( +case `"$ac_path_FGREP" --version 2>&1` in *GNU*) ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; -#( *) ac_count=0 printf %s 0123456789 >"conftest.in" @@ -5683,8 +5598,7 @@ else ac_cv_path_FGREP=$FGREP fi - fi ;; -esac + fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 printf "%s\n" "$ac_cv_path_FGREP" >&6; } @@ -5715,9 +5629,8 @@ test -z "$GREP" && GREP=grep if test ${with_gnu_ld+y} then : withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes -else case e in #( - e) with_gnu_ld=no ;; -esac +else $as_nop + with_gnu_ld=no fi ac_prog=ld @@ -5726,7 +5639,7 @@ if test yes = "$GCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 printf %s "checking for ld used by $CC... " >&6; } case $host in - *-*-mingw* | *-*-windows*) + *-*-mingw*) # gcc leaves a trailing carriage return, which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) @@ -5762,8 +5675,8 @@ fi if test ${lt_cv_path_LD+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -z "$LD"; then +else $as_nop + if test -z "$LD"; then lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS=$lt_save_ifs @@ -5786,8 +5699,7 @@ else case e in #( IFS=$lt_save_ifs else lt_cv_path_LD=$LD # Let the user override the test with a path. -fi ;; -esac +fi fi LD=$lt_cv_path_LD @@ -5804,8 +5716,8 @@ printf %s "checking if the linker ($LD) is GNU ld... " >&6; } if test ${lt_cv_prog_gnu_ld+y} then : printf %s "(cached) " >&6 -else case e in #( - e) # I'd rather use --version here, but apparently some GNU lds only accept -v. +else $as_nop + # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &1 &5 @@ -5833,8 +5744,8 @@ printf %s "checking for BSD- or MS-compatible name lister (nm)... " >&6; } if test ${lt_cv_path_NM+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$NM"; then +else $as_nop + if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM=$NM else @@ -5855,7 +5766,7 @@ else # Tru64's nm complains that /dev/null is an invalid object file # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty case $build_os in - mingw* | windows*) lt_bad_file=conftest.nm/nofile ;; + mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in @@ -5881,8 +5792,7 @@ else IFS=$lt_save_ifs done : ${lt_cv_path_NM=no} -fi ;; -esac +fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 printf "%s\n" "$lt_cv_path_NM" >&6; } @@ -5903,8 +5813,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DUMPBIN+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$DUMPBIN"; then +else $as_nop + if test -n "$DUMPBIN"; then ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5926,8 +5836,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi DUMPBIN=$ac_cv_prog_DUMPBIN if test -n "$DUMPBIN"; then @@ -5953,8 +5862,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DUMPBIN+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_DUMPBIN"; then +else $as_nop + if test -n "$ac_ct_DUMPBIN"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -5976,8 +5885,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN if test -n "$ac_ct_DUMPBIN"; then @@ -6031,8 +5939,8 @@ printf %s "checking the name lister ($NM) interface... " >&6; } if test ${lt_cv_nm_interface+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_nm_interface="BSD nm" +else $as_nop + lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) @@ -6045,8 +5953,7 @@ else case e in #( if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi - rm -f conftest* ;; -esac + rm -f conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 printf "%s\n" "$lt_cv_nm_interface" >&6; } @@ -6068,8 +5975,8 @@ printf %s "checking the maximum length of command line arguments... " >&6; } if test ${lt_cv_sys_max_cmd_len+y} then : printf %s "(cached) " >&6 -else case e in #( - e) i=0 +else $as_nop + i=0 teststring=ABCD case $build_os in @@ -6088,7 +5995,7 @@ else case e in #( lt_cv_sys_max_cmd_len=-1; ;; - cygwin* | mingw* | windows* | cegcc*) + cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, @@ -6110,7 +6017,7 @@ else case e in #( lt_cv_sys_max_cmd_len=8192; ;; - darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` @@ -6191,8 +6098,7 @@ else case e in #( fi ;; esac - ;; -esac + fi if test -n "$lt_cv_sys_max_cmd_len"; then @@ -6249,11 +6155,11 @@ printf %s "checking how to convert $build file names to $host format... " >&6; } if test ${lt_cv_to_host_file_cmd+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $host in +else $as_nop + case $host in *-*-mingw* ) case $build in - *-*-mingw* | *-*-windows* ) # actually msys + *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) @@ -6266,7 +6172,7 @@ else case e in #( ;; *-*-cygwin* ) case $build in - *-*-mingw* | *-*-windows* ) # actually msys + *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) @@ -6281,8 +6187,7 @@ else case e in #( lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac - ;; -esac + fi to_host_file_cmd=$lt_cv_to_host_file_cmd @@ -6298,20 +6203,19 @@ printf %s "checking how to convert $build file names to toolchain format... " >& if test ${lt_cv_to_tool_file_cmd+y} then : printf %s "(cached) " >&6 -else case e in #( - e) #assume ordinary cross tools, or native build. +else $as_nop + #assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in - *-*-mingw* | *-*-windows* ) + *-*-mingw* ) case $build in - *-*-mingw* | *-*-windows* ) # actually msys + *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac - ;; -esac + fi to_tool_file_cmd=$lt_cv_to_tool_file_cmd @@ -6327,9 +6231,8 @@ printf %s "checking for $LD option to reload object files... " >&6; } if test ${lt_cv_ld_reload_flag+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_ld_reload_flag='-r' ;; -esac +else $as_nop + lt_cv_ld_reload_flag='-r' fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 printf "%s\n" "$lt_cv_ld_reload_flag" >&6; } @@ -6340,7 +6243,7 @@ case $reload_flag in esac reload_cmds='$LD$reload_flag -o $output$reload_objs' case $host_os in - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) if test yes != "$GCC"; then reload_cmds=false fi @@ -6362,15 +6265,16 @@ esac -# Extract the first word of "file", so it can be a program name with args. -set dummy file; ac_word=$2 +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}file", so it can be a program name with args. +set dummy ${ac_tool_prefix}file; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_FILECMD+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$FILECMD"; then +else $as_nop + if test -n "$FILECMD"; then ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6384,7 +6288,7 @@ do esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_FILECMD=":" + ac_cv_prog_FILECMD="${ac_tool_prefix}file" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi @@ -6392,8 +6296,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi FILECMD=$ac_cv_prog_FILECMD if test -n "$FILECMD"; then @@ -6405,6 +6308,65 @@ printf "%s\n" "no" >&6; } fi +fi +if test -z "$ac_cv_prog_FILECMD"; then + ac_ct_FILECMD=$FILECMD + # Extract the first word of "file", so it can be a program name with args. +set dummy file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_FILECMD"; then + ac_cv_prog_ac_ct_FILECMD="$ac_ct_FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_FILECMD="file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_FILECMD=$ac_cv_prog_ac_ct_FILECMD +if test -n "$ac_ct_FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FILECMD" >&5 +printf "%s\n" "$ac_ct_FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_FILECMD" = x; then + FILECMD=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + FILECMD=$ac_ct_FILECMD + fi +else + FILECMD="$ac_cv_prog_FILECMD" +fi + @@ -6419,8 +6381,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OBJDUMP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$OBJDUMP"; then +else $as_nop + if test -n "$OBJDUMP"; then ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6442,8 +6404,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi OBJDUMP=$ac_cv_prog_OBJDUMP if test -n "$OBJDUMP"; then @@ -6465,8 +6426,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OBJDUMP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_OBJDUMP"; then +else $as_nop + if test -n "$ac_ct_OBJDUMP"; then ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6488,8 +6449,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP if test -n "$ac_ct_OBJDUMP"; then @@ -6527,8 +6487,8 @@ printf %s "checking how to recognize dependent libraries... " >&6; } if test ${lt_cv_deplibs_check_method+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_file_magic_cmd='$MAGIC_CMD' +else $as_nop + lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= lt_cv_deplibs_check_method='unknown' # Need to set the preceding variable on all platforms that support @@ -6536,6 +6496,7 @@ lt_cv_deplibs_check_method='unknown' # 'none' -- dependencies not supported. # 'unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # that responds to the $file_magic_cmd with a given extended regex. # If you have 'file' or equivalent on your system and you're not sure @@ -6562,7 +6523,7 @@ cygwin*) lt_cv_file_magic_cmd='func_win32_libid' ;; -mingw* | windows* | pw32*) +mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. @@ -6644,7 +6605,7 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) lt_cv_deplibs_check_method=pass_all ;; -netbsd*) +netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' else @@ -6662,7 +6623,7 @@ newos6*) lt_cv_deplibs_check_method=pass_all ;; -openbsd*) +openbsd* | bitrig*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' else @@ -6720,8 +6681,7 @@ os2*) lt_cv_deplibs_check_method=pass_all ;; esac - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 printf "%s\n" "$lt_cv_deplibs_check_method" >&6; } @@ -6730,7 +6690,7 @@ file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in - mingw* | windows* | pw32*) + mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else @@ -6773,8 +6733,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DLLTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$DLLTOOL"; then +else $as_nop + if test -n "$DLLTOOL"; then ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6796,8 +6756,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi DLLTOOL=$ac_cv_prog_DLLTOOL if test -n "$DLLTOOL"; then @@ -6819,8 +6778,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DLLTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_DLLTOOL"; then +else $as_nop + if test -n "$ac_ct_DLLTOOL"; then ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6842,8 +6801,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL if test -n "$ac_ct_DLLTOOL"; then @@ -6882,11 +6840,11 @@ printf %s "checking how to associate runtime and link libraries... " >&6; } if test ${lt_cv_sharedlib_from_linklib_cmd+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_sharedlib_from_linklib_cmd='unknown' +else $as_nop + lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in -cygwin* | mingw* | windows* | pw32* | cegcc*) +cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh; # decide which one to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in @@ -6903,8 +6861,7 @@ cygwin* | mingw* | windows* | pw32* | cegcc*) lt_cv_sharedlib_from_linklib_cmd=$ECHO ;; esac - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 printf "%s\n" "$lt_cv_sharedlib_from_linklib_cmd" >&6; } @@ -6927,8 +6884,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AR+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$AR"; then +else $as_nop + if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -6950,8 +6907,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then @@ -6977,8 +6933,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_AR+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_AR"; then +else $as_nop + if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -7000,8 +6956,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then @@ -7038,7 +6993,7 @@ fi # Use ARFLAGS variable as AR's operation code to sync the variable naming with # Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have -# higher priority because that's what people were doing historically (setting +# higher priority because thats what people were doing historically (setting # ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS # variable obsoleted/removed. @@ -7063,8 +7018,8 @@ printf %s "checking for archiver @FILE support... " >&6; } if test ${lt_cv_ar_at_file+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_ar_at_file=no +else $as_nop + lt_cv_ar_at_file=no cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -7101,8 +7056,7 @@ then : fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 printf "%s\n" "$lt_cv_ar_at_file" >&6; } @@ -7127,8 +7081,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_STRIP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$STRIP"; then +else $as_nop + if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -7150,8 +7104,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then @@ -7173,8 +7126,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_STRIP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_STRIP"; then +else $as_nop + if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -7196,8 +7149,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then @@ -7238,8 +7190,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_RANLIB+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$RANLIB"; then +else $as_nop + if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -7261,8 +7213,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then @@ -7284,8 +7235,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_RANLIB+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_RANLIB"; then +else $as_nop + if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -7307,8 +7258,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then @@ -7347,8 +7297,15 @@ old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" - old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in @@ -7412,8 +7369,8 @@ printf %s "checking command to parse $NM output from $compiler object... " >&6; if test ${lt_cv_sys_global_symbol_pipe+y} then : printf %s "(cached) " >&6 -else case e in #( - e) +else $as_nop + # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] @@ -7428,7 +7385,7 @@ case $host_os in aix*) symcode='[BCDT]' ;; -cygwin* | mingw* | windows* | pw32* | cegcc*) +cygwin* | mingw* | pw32* | cegcc*) symcode='[ABCDGISTW]' ;; hpux*) @@ -7443,7 +7400,7 @@ osf*) symcode='[BCDEGQRST]' ;; solaris*) - symcode='[BCDRT]' + symcode='[BDRT]' ;; sco3.2v5*) symcode='[DT]' @@ -7507,7 +7464,7 @@ $lt_c_name_lib_hook\ # Handle CRLF in mingw tool chain opt_cr= case $build_os in -mingw* | windows*) +mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac @@ -7568,11 +7525,8 @@ _LT_EOF test $ac_status = 0; }; then # Now try to grab the symbols. nlist=conftest.nm - if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 - (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && test -s "$nlist"; then + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&5 + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&5 && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" @@ -7668,8 +7622,7 @@ _LT_EOF lt_cv_sys_global_symbol_pipe= fi done - ;; -esac + fi if test -z "$lt_cv_sys_global_symbol_pipe"; then @@ -7733,9 +7686,8 @@ printf %s "checking for sysroot... " >&6; } if test ${with_sysroot+y} then : withval=$with_sysroot; -else case e in #( - e) with_sysroot=no ;; -esac +else $as_nop + with_sysroot=no fi @@ -7743,9 +7695,7 @@ lt_sysroot= case $with_sysroot in #( yes) if test yes = "$GCC"; then - # Trim trailing / since we'll always append absolute paths and we want - # to avoid //, if only for less confusing output for the user. - lt_sysroot=`$CC --print-sysroot 2>/dev/null | $SED 's:/\+$::'` + lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) @@ -7772,8 +7722,8 @@ printf %s "checking for a working dd... " >&6; } if test ${ac_cv_path_lt_DD+y} then : printf %s "(cached) " >&6 -else case e in #( - e) printf 0123456789abcdef0123456789abcdef >conftest.i +else $as_nop + printf 0123456789abcdef0123456789abcdef >conftest.i cat conftest.i conftest.i >conftest2.i : ${lt_DD:=$DD} if test -z "$lt_DD"; then @@ -7809,8 +7759,7 @@ else ac_cv_path_lt_DD=$lt_DD fi -rm -f conftest.i conftest2.i conftest.out ;; -esac +rm -f conftest.i conftest2.i conftest.out fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_lt_DD" >&5 printf "%s\n" "$ac_cv_path_lt_DD" >&6; } @@ -7821,8 +7770,8 @@ printf %s "checking how to truncate binary pipes... " >&6; } if test ${lt_cv_truncate_bin+y} then : printf %s "(cached) " >&6 -else case e in #( - e) printf 0123456789abcdef0123456789abcdef >conftest.i +else $as_nop + printf 0123456789abcdef0123456789abcdef >conftest.i cat conftest.i conftest.i >conftest2.i lt_cv_truncate_bin= if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then @@ -7830,8 +7779,7 @@ if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; the && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" fi rm -f conftest.i conftest2.i conftest.out -test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" ;; -esac +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_truncate_bin" >&5 printf "%s\n" "$lt_cv_truncate_bin" >&6; } @@ -7962,7 +7910,7 @@ mips64*-*linux*) ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ -s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. Note that the listed cases only cover the # situations where additional linker options are needed (such as when @@ -7981,7 +7929,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; - x86_64-*linux*|x86_64-gnu*) + x86_64-*linux*) case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" @@ -8010,7 +7958,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; - x86_64-*linux*|x86_64-gnu*) + x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; powerpcle-*linux*) @@ -8041,8 +7989,8 @@ printf %s "checking whether the C compiler needs -belf... " >&6; } if test ${lt_cv_cc_needs_belf+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_ext=c +else $as_nop + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' @@ -8062,9 +8010,8 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_cv_cc_needs_belf=yes -else case e in #( - e) lt_cv_cc_needs_belf=no ;; -esac +else $as_nop + lt_cv_cc_needs_belf=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -8073,8 +8020,7 @@ ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 printf "%s\n" "$lt_cv_cc_needs_belf" >&6; } @@ -8132,8 +8078,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MANIFEST_TOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$MANIFEST_TOOL"; then +else $as_nop + if test -n "$MANIFEST_TOOL"; then ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8155,8 +8101,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL if test -n "$MANIFEST_TOOL"; then @@ -8178,8 +8123,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_MANIFEST_TOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_MANIFEST_TOOL"; then +else $as_nop + if test -n "$ac_ct_MANIFEST_TOOL"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8201,8 +8146,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL if test -n "$ac_ct_MANIFEST_TOOL"; then @@ -8231,23 +8175,22 @@ fi test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 printf %s "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } -if test ${lt_cv_path_manifest_tool+y} +if test ${lt_cv_path_mainfest_tool+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_path_manifest_tool=no +else $as_nop + lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&5 if $GREP 'Manifest Tool' conftest.out > /dev/null; then - lt_cv_path_manifest_tool=yes + lt_cv_path_mainfest_tool=yes fi - rm -f conftest* ;; -esac + rm -f conftest* fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_manifest_tool" >&5 -printf "%s\n" "$lt_cv_path_manifest_tool" >&6; } -if test yes != "$lt_cv_path_manifest_tool"; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +printf "%s\n" "$lt_cv_path_mainfest_tool" >&6; } +if test yes != "$lt_cv_path_mainfest_tool"; then MANIFEST_TOOL=: fi @@ -8266,8 +8209,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DSYMUTIL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$DSYMUTIL"; then +else $as_nop + if test -n "$DSYMUTIL"; then ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8289,8 +8232,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi DSYMUTIL=$ac_cv_prog_DSYMUTIL if test -n "$DSYMUTIL"; then @@ -8312,8 +8254,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DSYMUTIL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_DSYMUTIL"; then +else $as_nop + if test -n "$ac_ct_DSYMUTIL"; then ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8335,8 +8277,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL if test -n "$ac_ct_DSYMUTIL"; then @@ -8370,8 +8311,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_NMEDIT+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$NMEDIT"; then +else $as_nop + if test -n "$NMEDIT"; then ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8393,8 +8334,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi NMEDIT=$ac_cv_prog_NMEDIT if test -n "$NMEDIT"; then @@ -8416,8 +8356,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_NMEDIT+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_NMEDIT"; then +else $as_nop + if test -n "$ac_ct_NMEDIT"; then ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8439,8 +8379,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT if test -n "$ac_ct_NMEDIT"; then @@ -8474,8 +8413,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_LIPO+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$LIPO"; then +else $as_nop + if test -n "$LIPO"; then ac_cv_prog_LIPO="$LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8497,8 +8436,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi LIPO=$ac_cv_prog_LIPO if test -n "$LIPO"; then @@ -8520,8 +8458,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_LIPO+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_LIPO"; then +else $as_nop + if test -n "$ac_ct_LIPO"; then ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8543,8 +8481,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO if test -n "$ac_ct_LIPO"; then @@ -8578,8 +8515,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$OTOOL"; then +else $as_nop + if test -n "$OTOOL"; then ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8601,8 +8538,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi OTOOL=$ac_cv_prog_OTOOL if test -n "$OTOOL"; then @@ -8624,8 +8560,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_OTOOL"; then +else $as_nop + if test -n "$ac_ct_OTOOL"; then ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8647,8 +8583,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL if test -n "$ac_ct_OTOOL"; then @@ -8682,8 +8617,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OTOOL64+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$OTOOL64"; then +else $as_nop + if test -n "$OTOOL64"; then ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8705,8 +8640,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi OTOOL64=$ac_cv_prog_OTOOL64 if test -n "$OTOOL64"; then @@ -8728,8 +8662,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OTOOL64+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_OTOOL64"; then +else $as_nop + if test -n "$ac_ct_OTOOL64"; then ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -8751,8 +8685,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 if test -n "$ac_ct_OTOOL64"; then @@ -8809,8 +8742,8 @@ printf %s "checking for -single_module linker flag... " >&6; } if test ${lt_cv_apple_cc_single_mod+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_apple_cc_single_mod=no +else $as_nop + lt_cv_apple_cc_single_mod=no if test -z "$LT_MULTI_MODULE"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE @@ -8836,8 +8769,7 @@ else case e in #( fi rm -rf libconftest.dylib* rm -f conftest.* - fi ;; -esac + fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 printf "%s\n" "$lt_cv_apple_cc_single_mod" >&6; } @@ -8847,8 +8779,8 @@ printf %s "checking for -exported_symbols_list linker flag... " >&6; } if test ${lt_cv_ld_exported_symbols_list+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_ld_exported_symbols_list=no +else $as_nop + lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" @@ -8866,15 +8798,13 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_cv_ld_exported_symbols_list=yes -else case e in #( - e) lt_cv_ld_exported_symbols_list=no ;; -esac +else $as_nop + lt_cv_ld_exported_symbols_list=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 printf "%s\n" "$lt_cv_ld_exported_symbols_list" >&6; } @@ -8884,8 +8814,8 @@ printf %s "checking for -force_load linker flag... " >&6; } if test ${lt_cv_ld_force_load+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_ld_force_load=no +else $as_nop + lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF @@ -8910,8 +8840,7 @@ _LT_EOF fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 printf "%s\n" "$lt_cv_ld_force_load" >&6; } @@ -8996,7 +8925,7 @@ fi enable_win32_dll=yes case $host in -*-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-cegcc*) +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. set dummy ${ac_tool_prefix}as; ac_word=$2 @@ -9005,8 +8934,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AS+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$AS"; then +else $as_nop + if test -n "$AS"; then ac_cv_prog_AS="$AS" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -9028,8 +8957,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi AS=$ac_cv_prog_AS if test -n "$AS"; then @@ -9051,8 +8979,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_AS+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_AS"; then +else $as_nop + if test -n "$ac_ct_AS"; then ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -9074,8 +9002,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_AS=$ac_cv_prog_ac_ct_AS if test -n "$ac_ct_AS"; then @@ -9109,8 +9036,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DLLTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$DLLTOOL"; then +else $as_nop + if test -n "$DLLTOOL"; then ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -9132,8 +9059,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi DLLTOOL=$ac_cv_prog_DLLTOOL if test -n "$DLLTOOL"; then @@ -9155,8 +9081,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DLLTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_DLLTOOL"; then +else $as_nop + if test -n "$ac_ct_DLLTOOL"; then ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -9178,8 +9104,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL if test -n "$ac_ct_DLLTOOL"; then @@ -9213,8 +9138,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OBJDUMP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$OBJDUMP"; then +else $as_nop + if test -n "$OBJDUMP"; then ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -9236,8 +9161,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi OBJDUMP=$ac_cv_prog_OBJDUMP if test -n "$OBJDUMP"; then @@ -9259,8 +9183,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OBJDUMP+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ac_ct_OBJDUMP"; then +else $as_nop + if test -n "$ac_ct_OBJDUMP"; then ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -9282,8 +9206,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP if test -n "$ac_ct_OBJDUMP"; then @@ -9356,9 +9279,8 @@ then : IFS=$lt_save_ifs ;; esac -else case e in #( - e) enable_shared=yes ;; -esac +else $as_nop + enable_shared=yes fi @@ -9389,9 +9311,8 @@ then : IFS=$lt_save_ifs ;; esac -else case e in #( - e) enable_static=yes ;; -esac +else $as_nop + enable_static=yes fi @@ -9422,9 +9343,8 @@ then : IFS=$lt_save_ifs ;; esac -else case e in #( - e) pic_mode=default ;; -esac +else $as_nop + pic_mode=default fi @@ -9454,9 +9374,8 @@ then : IFS=$lt_save_ifs ;; esac -else case e in #( - e) enable_fast_install=yes ;; -esac +else $as_nop + enable_fast_install=yes fi @@ -9483,17 +9402,15 @@ then : ;; esac lt_cv_with_aix_soname=$with_aix_soname -else case e in #( - e) if test ${lt_cv_with_aix_soname+y} +else $as_nop + if test ${lt_cv_with_aix_soname+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_with_aix_soname=aix ;; -esac +else $as_nop + lt_cv_with_aix_soname=aix fi - with_aix_soname=$lt_cv_with_aix_soname ;; -esac + with_aix_soname=$lt_cv_with_aix_soname fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_aix_soname" >&5 @@ -9584,8 +9501,8 @@ printf %s "checking for objdir... " >&6; } if test ${lt_cv_objdir+y} then : printf %s "(cached) " >&6 -else case e in #( - e) rm -f .libs 2>/dev/null +else $as_nop + rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs @@ -9593,8 +9510,7 @@ else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi -rmdir .libs 2>/dev/null ;; -esac +rmdir .libs 2>/dev/null fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 printf "%s\n" "$lt_cv_objdir" >&6; } @@ -9655,8 +9571,8 @@ printf %s "checking for ${ac_tool_prefix}file... " >&6; } if test ${lt_cv_path_MAGIC_CMD+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $MAGIC_CMD in +else $as_nop + case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. ;; @@ -9699,7 +9615,6 @@ _LT_EOF IFS=$lt_save_ifs MAGIC_CMD=$lt_save_MAGIC_CMD ;; -esac ;; esac fi @@ -9723,8 +9638,8 @@ printf %s "checking for file... " >&6; } if test ${lt_cv_path_MAGIC_CMD+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $MAGIC_CMD in +else $as_nop + case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. ;; @@ -9767,7 +9682,6 @@ _LT_EOF IFS=$lt_save_ifs MAGIC_CMD=$lt_save_MAGIC_CMD ;; -esac ;; esac fi @@ -9867,8 +9781,8 @@ printf %s "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } if test ${lt_cv_prog_compiler_rtti_exceptions+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler_rtti_exceptions=no +else $as_nop + lt_cv_prog_compiler_rtti_exceptions=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-fno-rtti -fno-exceptions" ## exclude from sc_useless_quotes_in_assignment @@ -9896,8 +9810,7 @@ else case e in #( fi fi $RM conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 printf "%s\n" "$lt_cv_prog_compiler_rtti_exceptions" >&6; } @@ -9953,7 +9866,7 @@ lt_prog_compiler_static= # PIC is the default for these OSes. ;; - mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style @@ -10056,7 +9969,7 @@ lt_prog_compiler_static= esac ;; - mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic='-DDLL_EXPORT' @@ -10097,8 +10010,8 @@ lt_prog_compiler_static= lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-static' ;; - *flang) - # Flang compiler. + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' @@ -10268,9 +10181,8 @@ printf %s "checking for $compiler option to produce PIC... " >&6; } if test ${lt_cv_prog_compiler_pic+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler_pic=$lt_prog_compiler_pic ;; -esac +else $as_nop + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic" >&6; } @@ -10285,8 +10197,8 @@ printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; if test ${lt_cv_prog_compiler_pic_works+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler_pic_works=no +else $as_nop + lt_cv_prog_compiler_pic_works=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic -DPIC" ## exclude from sc_useless_quotes_in_assignment @@ -10314,8 +10226,7 @@ else case e in #( fi fi $RM conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_works" >&6; } @@ -10351,8 +10262,8 @@ printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; if test ${lt_cv_prog_compiler_static_works+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler_static_works=no +else $as_nop + lt_cv_prog_compiler_static_works=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext @@ -10373,8 +10284,7 @@ else case e in #( fi $RM -r conftest* LDFLAGS=$save_LDFLAGS - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 printf "%s\n" "$lt_cv_prog_compiler_static_works" >&6; } @@ -10396,8 +10306,8 @@ printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler_c_o=no +else $as_nop + lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest @@ -10437,8 +10347,7 @@ else case e in #( cd .. $RM -r conftest $RM conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } @@ -10453,8 +10362,8 @@ printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler_c_o=no +else $as_nop + lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest @@ -10494,8 +10403,7 @@ else case e in #( cd .. $RM -r conftest $RM conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } @@ -10574,7 +10482,7 @@ printf %s "checking whether the $compiler linker ($LD) supports shared libraries extract_expsyms_cmds= case $host_os in - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. @@ -10586,9 +10494,12 @@ printf %s "checking whether the $compiler linker ($LD) supports shared libraries # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; - openbsd*) + openbsd* | bitrig*) with_gnu_ld=no ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs=no + ;; esac ld_shlibs=yes @@ -10689,7 +10600,7 @@ _LT_EOF fi ;; - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec='-L$libdir' @@ -10745,7 +10656,7 @@ _LT_EOF cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' - old_archive_from_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes=yes file_list_spec='@' ;; @@ -10824,6 +10735,7 @@ _LT_EOF case $cc_basename in tcc*) + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' export_dynamic_flag_spec='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) @@ -10844,7 +10756,7 @@ _LT_EOF fi ;; - netbsd*) + netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= @@ -11090,8 +11002,8 @@ else if test ${lt_cv_aix_libpath_+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -11123,8 +11035,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \ if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=/usr/lib:/lib fi - ;; -esac + fi aix_libpath=$lt_cv_aix_libpath_ @@ -11146,8 +11057,8 @@ else if test ${lt_cv_aix_libpath_+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -11179,8 +11090,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \ if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=/usr/lib:/lib fi - ;; -esac + fi aix_libpath=$lt_cv_aix_libpath_ @@ -11236,7 +11146,7 @@ fi export_dynamic_flag_spec=-rdynamic ;; - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is @@ -11253,14 +11163,14 @@ fi # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. - archive_cmds='$CC -Fe $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ - $CC -Fe $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, )='true' @@ -11431,8 +11341,8 @@ printf %s "checking if $CC understands -b... " >&6; } if test ${lt_cv_prog_compiler__b+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_prog_compiler__b=no +else $as_nop + lt_cv_prog_compiler__b=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -b" echo "$lt_simple_link_test_code" > conftest.$ac_ext @@ -11453,8 +11363,7 @@ else case e in #( fi $RM -r conftest* LDFLAGS=$save_LDFLAGS - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 printf "%s\n" "$lt_cv_prog_compiler__b" >&6; } @@ -11502,8 +11411,8 @@ printf %s "checking whether the $host_os linker accepts -exported_symbol... " >& if test ${lt_cv_irix_exported_symbol+y} then : printf %s "(cached) " >&6 -else case e in #( - e) save_LDFLAGS=$LDFLAGS +else $as_nop + save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -11512,20 +11421,19 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_cv_irix_exported_symbol=yes -else case e in #( - e) lt_cv_irix_exported_symbol=no ;; -esac +else $as_nop + lt_cv_irix_exported_symbol=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext - LDFLAGS=$save_LDFLAGS ;; -esac + LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } if test yes = "$lt_cv_irix_exported_symbol"; then archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' fi + link_all_deplibs=no else archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' @@ -11543,11 +11451,12 @@ printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } # Fabrice Bellard et al's Tiny C Compiler ld_shlibs=yes archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' ;; esac ;; - netbsd*) + netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else @@ -11569,7 +11478,7 @@ printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } *nto* | *qnx*) ;; - openbsd*) + openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then hardcode_direct=yes hardcode_shlibpath_var=no @@ -11612,7 +11521,7 @@ printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' - old_archive_from_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes=yes file_list_spec='@' ;; @@ -11845,8 +11754,8 @@ printf %s "checking whether -lc should be explicitly linked in... " >&6; } if test ${lt_cv_archive_cmds_need_lc+y} then : printf %s "(cached) " >&6 -else case e in #( - e) $RM conftest* +else $as_nop + $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 @@ -11882,8 +11791,7 @@ else case e in #( cat conftest.err 1>&5 fi $RM conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 printf "%s\n" "$lt_cv_archive_cmds_need_lc" >&6; } @@ -12054,7 +11962,7 @@ if test yes = "$GCC"; then *) lt_awk_arg='/^libraries:/' ;; esac case $host_os in - mingw* | windows* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; + mingw* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; *) lt_sed_strip_eq='s|=/|/|g' ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` @@ -12112,7 +12020,7 @@ BEGIN {RS = " "; FS = "/|\n";} { # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in - mingw* | windows* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's|/\([A-Za-z]:\)|\1|g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` @@ -12280,7 +12188,7 @@ bsdi[45]*) # libtool to hard-code these into programs ;; -cygwin* | mingw* | windows* | pw32* | cegcc*) +cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no @@ -12312,7 +12220,7 @@ cygwin* | mingw* | windows* | pw32* | cegcc*) sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" ;; - mingw* | windows* | cegcc*) + mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; @@ -12331,7 +12239,7 @@ cygwin* | mingw* | windows* | pw32* | cegcc*) library_names_spec='$libname.dll.lib' case $build_os in - mingw* | windows*) + mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' @@ -12579,7 +12487,7 @@ linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no - library_names_spec='$libname$release$shared_ext $libname$shared_ext' + library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH @@ -12591,9 +12499,8 @@ linux*android*) hardcode_into_libs=yes dynamic_linker='Android linker' - # -rpath works at least for libraries that are not overridden by - # libraries installed in system locations. - hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec='-L$libdir' ;; # This must be glibc/ELF. @@ -12611,8 +12518,8 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) if test ${lt_cv_shlibpath_overrides_runpath+y} then : printf %s "(cached) " >&6 -else case e in #( - e) lt_cv_shlibpath_overrides_runpath=no +else $as_nop + lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ @@ -12639,8 +12546,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir - ;; -esac + fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath @@ -12650,7 +12556,7 @@ fi # before this can be enabled. hardcode_into_libs=yes - # Ideally, we could use ldconfig to report *all* directories which are + # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, @@ -12670,6 +12576,18 @@ fi dynamic_linker='GNU/Linux ld.so' ;; +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + netbsd*) version_type=sunos need_lib_prefix=no @@ -12707,7 +12625,7 @@ newsos6) dynamic_linker='ldqnx.so' ;; -openbsd*) +openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no @@ -13048,7 +12966,7 @@ else lt_cv_dlopen_self=yes ;; - mingw* | windows* | pw32* | cegcc*) + mingw* | pw32* | cegcc*) lt_cv_dlopen=LoadLibrary lt_cv_dlopen_libs= ;; @@ -13065,22 +12983,16 @@ printf %s "checking for dlopen in -ldl... " >&6; } if test ${ac_cv_lib_dl_dlopen+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char dlopen (void); + builtin and then its argument prototype would still apply. */ +char dlopen (); int main (void) { @@ -13092,27 +13004,24 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dl_dlopen=yes -else case e in #( - e) ac_cv_lib_dl_dlopen=no ;; -esac +else $as_nop + ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes then : lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl -else case e in #( - e) +else $as_nop + lt_cv_dlopen=dyld lt_cv_dlopen_libs= lt_cv_dlopen_self=yes - ;; -esac + fi ;; @@ -13130,28 +13039,22 @@ fi if test "x$ac_cv_func_shl_load" = xyes then : lt_cv_dlopen=shl_load -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 printf %s "checking for shl_load in -ldld... " >&6; } if test ${ac_cv_lib_dld_shl_load+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char shl_load (void); + builtin and then its argument prototype would still apply. */ +char shl_load (); int main (void) { @@ -13163,47 +13066,39 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dld_shl_load=yes -else case e in #( - e) ac_cv_lib_dld_shl_load=no ;; -esac +else $as_nop + ac_cv_lib_dld_shl_load=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 printf "%s\n" "$ac_cv_lib_dld_shl_load" >&6; } if test "x$ac_cv_lib_dld_shl_load" = xyes then : lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld -else case e in #( - e) ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +else $as_nop + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" if test "x$ac_cv_func_dlopen" = xyes then : lt_cv_dlopen=dlopen -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 printf %s "checking for dlopen in -ldl... " >&6; } if test ${ac_cv_lib_dl_dlopen+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char dlopen (void); + builtin and then its argument prototype would still apply. */ +char dlopen (); int main (void) { @@ -13215,42 +13110,34 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dl_dlopen=yes -else case e in #( - e) ac_cv_lib_dl_dlopen=no ;; -esac +else $as_nop + ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes then : lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 printf %s "checking for dlopen in -lsvld... " >&6; } if test ${ac_cv_lib_svld_dlopen+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lsvld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char dlopen (void); + builtin and then its argument prototype would still apply. */ +char dlopen (); int main (void) { @@ -13262,42 +13149,34 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_svld_dlopen=yes -else case e in #( - e) ac_cv_lib_svld_dlopen=no ;; -esac +else $as_nop + ac_cv_lib_svld_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 printf "%s\n" "$ac_cv_lib_svld_dlopen" >&6; } if test "x$ac_cv_lib_svld_dlopen" = xyes then : lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 printf %s "checking for dld_link in -ldld... " >&6; } if test ${ac_cv_lib_dld_dld_link+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char dld_link (void); + builtin and then its argument prototype would still apply. */ +char dld_link (); int main (void) { @@ -13309,14 +13188,12 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dld_dld_link=yes -else case e in #( - e) ac_cv_lib_dld_dld_link=no ;; -esac +else $as_nop + ac_cv_lib_dld_dld_link=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 printf "%s\n" "$ac_cv_lib_dld_dld_link" >&6; } @@ -13325,24 +13202,19 @@ then : lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld fi - ;; -esac + fi - ;; -esac + fi - ;; -esac + fi - ;; -esac + fi - ;; -esac + fi ;; @@ -13370,8 +13242,8 @@ printf %s "checking whether a program can dlopen itself... " >&6; } if test ${lt_cv_dlopen_self+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test yes = "$cross_compiling"; then : +else $as_nop + if test yes = "$cross_compiling"; then : lt_cv_dlopen_self=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 @@ -13465,8 +13337,7 @@ _LT_EOF fi rm -fr conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 printf "%s\n" "$lt_cv_dlopen_self" >&6; } @@ -13478,8 +13349,8 @@ printf %s "checking whether a statically linked program can dlopen itself... " > if test ${lt_cv_dlopen_self_static+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test yes = "$cross_compiling"; then : +else $as_nop + if test yes = "$cross_compiling"; then : lt_cv_dlopen_self_static=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 @@ -13573,8 +13444,7 @@ _LT_EOF fi rm -fr conftest* - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 printf "%s\n" "$lt_cv_dlopen_self_static" >&6; } @@ -13757,34 +13627,31 @@ if test ${enable_largefile+y} then : enableval=$enable_largefile; fi -if test "$enable_largefile,$enable_year2038" != no,no -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable large file support" >&5 -printf %s "checking for $CC option to enable large file support... " >&6; } -if test ${ac_cv_sys_largefile_opts+y} + +if test "$enable_largefile" != no; then + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for special C compiler options needed for large files" >&5 +printf %s "checking for special C compiler options needed for large files... " >&6; } +if test ${ac_cv_sys_largefile_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_save_CC="$CC" - ac_opt_found=no - for ac_opt in "none needed" "-D_FILE_OFFSET_BITS=64" "-D_LARGE_FILES=1" "-n32"; do - if test x"$ac_opt" != x"none needed" -then : - CC="$ac_save_CC $ac_opt" -fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + ac_cv_sys_largefile_CC=no + if test "$GCC" != yes; then + ac_save_CC=$CC + while :; do + # IRIX 6.2 and later do not support large files by default, + # so use the C compiler's -n32 option if that helps. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include -#ifndef FTYPE -# define FTYPE off_t -#endif - /* Check that FTYPE can represent 2**63 - 1 correctly. - We can't simply define LARGE_FTYPE to be 9223372036854775807, + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_FTYPE (((FTYPE) 1 << 31 << 31) - 1 + ((FTYPE) 1 << 31 << 31)) - int FTYPE_is_large[(LARGE_FTYPE % 2147483629 == 721 - && LARGE_FTYPE % 2147483647 == 1) +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; int main (void) @@ -13794,88 +13661,47 @@ main (void) return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - if test x"$ac_opt" = x"none needed" -then : - # GNU/Linux s390x and alpha need _FILE_OFFSET_BITS=64 for wide ino_t. - CC="$CC -DFTYPE=ino_t" if ac_fn_c_try_compile "$LINENO" then : - -else case e in #( - e) CC="$CC -D_FILE_OFFSET_BITS=64" - if ac_fn_c_try_compile "$LINENO" -then : - ac_opt='-D_FILE_OFFSET_BITS=64' -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam ;; -esac + break fi rm -f core conftest.err conftest.$ac_objext conftest.beam + CC="$CC -n32" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_largefile_CC=' -n32'; break fi - ac_cv_sys_largefile_opts=$ac_opt - ac_opt_found=yes -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - test $ac_opt_found = no || break - done - CC="$ac_save_CC" - - test $ac_opt_found = yes || ac_cv_sys_largefile_opts="support not detected" ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam + break + done + CC=$ac_save_CC + rm -f conftest.$ac_ext + fi fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_opts" >&5 -printf "%s\n" "$ac_cv_sys_largefile_opts" >&6; } - -ac_have_largefile=yes -case $ac_cv_sys_largefile_opts in #( - "none needed") : - ;; #( - "supported through gnulib") : - ;; #( - "support not detected") : - ac_have_largefile=no ;; #( - "-D_FILE_OFFSET_BITS=64") : - -printf "%s\n" "#define _FILE_OFFSET_BITS 64" >>confdefs.h - ;; #( - "-D_LARGE_FILES=1") : - -printf "%s\n" "#define _LARGE_FILES 1" >>confdefs.h - ;; #( - "-n32") : - CC="$CC -n32" ;; #( - *) : - as_fn_error $? "internal error: bad value for \$ac_cv_sys_largefile_opts" "$LINENO" 5 ;; -esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_CC" >&5 +printf "%s\n" "$ac_cv_sys_largefile_CC" >&6; } + if test "$ac_cv_sys_largefile_CC" != no; then + CC=$CC$ac_cv_sys_largefile_CC + fi -if test "$enable_year2038" != no -then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option for timestamps after 2038" >&5 -printf %s "checking for $CC option for timestamps after 2038... " >&6; } -if test ${ac_cv_sys_year2038_opts+y} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _FILE_OFFSET_BITS value needed for large files" >&5 +printf %s "checking for _FILE_OFFSET_BITS value needed for large files... " >&6; } +if test ${ac_cv_sys_file_offset_bits+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_save_CPPFLAGS="$CPPFLAGS" - ac_opt_found=no - for ac_opt in "none needed" "-D_TIME_BITS=64" "-D__MINGW_USE_VC2005_COMPAT" "-U_USE_32_BIT_TIME_T -D__MINGW_USE_VC2005_COMPAT"; do - if test x"$ac_opt" != x"none needed" -then : - CPPFLAGS="$ac_save_CPPFLAGS $ac_opt" -fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ - - #include - /* Check that time_t can represent 2**32 - 1 correctly. */ - #define LARGE_TIME_T \\ - ((time_t) (((time_t) 1 << 30) - 1 + 3 * ((time_t) 1 << 30))) - int verify_time_t_range[(LARGE_TIME_T / 65537 == 65535 - && LARGE_TIME_T % 65537 == 0) - ? 1 : -1]; - +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; int main (void) { @@ -13886,68 +13712,21 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ac_cv_sys_year2038_opts="$ac_opt" - ac_opt_found=yes + ac_cv_sys_file_offset_bits=no; break fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - test $ac_opt_found = no || break - done - CPPFLAGS="$ac_save_CPPFLAGS" - test $ac_opt_found = yes || ac_cv_sys_year2038_opts="support not detected" ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_year2038_opts" >&5 -printf "%s\n" "$ac_cv_sys_year2038_opts" >&6; } - -ac_have_year2038=yes -case $ac_cv_sys_year2038_opts in #( - "none needed") : - ;; #( - "support not detected") : - ac_have_year2038=no ;; #( - "-D_TIME_BITS=64") : - -printf "%s\n" "#define _TIME_BITS 64" >>confdefs.h - ;; #( - "-D__MINGW_USE_VC2005_COMPAT") : - -printf "%s\n" "#define __MINGW_USE_VC2005_COMPAT 1" >>confdefs.h - ;; #( - "-U_USE_32_BIT_TIME_T"*) : - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "the 'time_t' type is currently forced to be 32-bit. It -will stop working after mid-January 2038. Remove -_USE_32BIT_TIME_T from the compiler flags. -See 'config.log' for more details" "$LINENO" 5; } ;; #( - *) : - as_fn_error $? "internal error: bad value for \$ac_cv_sys_year2038_opts" "$LINENO" 5 ;; -esac - -fi - -fi - -# Check for GCC visibility feature - - - - VISIBILITY_CFLAGS= - VISIBILITY_CXXFLAGS= - HAVE_VISIBILITY=0 - if test -n "$GCC"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the -Werror option is usable" >&5 -printf %s "checking whether the -Werror option is usable... " >&6; } - if test ${pcre2_cv_cc_vis_werror+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -Werror" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ - +#define _FILE_OFFSET_BITS 64 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; int main (void) { @@ -13958,38 +13737,41 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - pcre2_cv_cc_vis_werror=yes -else case e in #( - e) pcre2_cv_cc_vis_werror=no ;; -esac + ac_cv_sys_file_offset_bits=64; break fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS="$pcre2_save_CFLAGS" ;; -esac + ac_cv_sys_file_offset_bits=unknown + break +done fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_vis_werror" >&5 -printf "%s\n" "$pcre2_cv_cc_vis_werror" >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for simple visibility declarations" >&5 -printf %s "checking for simple visibility declarations... " >&6; } - if test ${pcre2_cv_cc_visibility+y} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_file_offset_bits" >&5 +printf "%s\n" "$ac_cv_sys_file_offset_bits" >&6; } +case $ac_cv_sys_file_offset_bits in #( + no | unknown) ;; + *) +printf "%s\n" "#define _FILE_OFFSET_BITS $ac_cv_sys_file_offset_bits" >>confdefs.h +;; +esac +rm -rf conftest* + if test $ac_cv_sys_file_offset_bits = unknown; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _LARGE_FILES value needed for large files" >&5 +printf %s "checking for _LARGE_FILES value needed for large files... " >&6; } +if test ${ac_cv_sys_large_files+y} then : printf %s "(cached) " >&6 -else case e in #( - e) - pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fvisibility=hidden" - if test $pcre2_cv_cc_vis_werror = yes; then - CFLAGS="$CFLAGS -Werror" - fi - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -extern __attribute__((__visibility__("hidden"))) int hiddenvar; - extern __attribute__((__visibility__("default"))) int exportedvar; - extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); - extern __attribute__((__visibility__("default"))) int exportedfunc (void); - void dummyfunc (void) {} - +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; int main (void) { @@ -14000,36 +13782,163 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - pcre2_cv_cc_visibility=yes -else case e in #( - e) pcre2_cv_cc_visibility=no ;; -esac + ac_cv_sys_large_files=no; break fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS="$pcre2_save_CFLAGS" ;; -esac -fi - - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_visibility" >&5 -printf "%s\n" "$pcre2_cv_cc_visibility" >&6; } - if test $pcre2_cv_cc_visibility = yes; then - VISIBILITY_CFLAGS="-fvisibility=hidden" - VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" - HAVE_VISIBILITY=1 - -printf "%s\n" "#define PCRE2_EXPORT __attribute__ ((visibility (\"default\")))" >>confdefs.h - - else - -printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _LARGE_FILES 1 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ - fi + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_large_files=1; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_cv_sys_large_files=unknown + break +done +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_large_files" >&5 +printf "%s\n" "$ac_cv_sys_large_files" >&6; } +case $ac_cv_sys_large_files in #( + no | unknown) ;; + *) +printf "%s\n" "#define _LARGE_FILES $ac_cv_sys_large_files" >>confdefs.h +;; +esac +rm -rf conftest* + fi +fi + + +# Check for GCC visibility feature + + + + VISIBILITY_CFLAGS= + HAVE_VISIBILITY=0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the -Werror option is usable" >&5 +printf %s "checking whether the -Werror option is usable... " >&6; } + if test ${pcre2_cv_cc_vis_werror+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + pcre2_cv_cc_vis_werror=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + #warning e + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + pcre2_cv_cc_vis_werror=yes + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_vis_werror" >&5 +printf "%s\n" "$pcre2_cv_cc_vis_werror" >&6; } + if test -n "$pcre2_cv_cc_vis_werror" && test $pcre2_cv_cc_vis_werror = yes + then + WORKING_WERROR=1 else + WORKING_WERROR=0 + fi + if test $pcre2_cv_cc_vis_werror = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GCC compatible visibility declarations" >&5 +printf %s "checking for GCC compatible visibility declarations... " >&6; } + if test ${pcre2_cv_cc_visibility+y} +then : + printf %s "(cached) " >&6 +else $as_nop -printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror -fvisibility=hidden" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + extern __attribute__((__visibility__("default"))) int exportedfunc (void); + void dummyfunc (void) {} + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cv_cc_visibility=yes +else $as_nop + pcre2_cv_cc_visibility=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" +fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_visibility" >&5 +printf "%s\n" "$pcre2_cv_cc_visibility" >&6; } fi + if test -n "$pcre2_cv_cc_visibility" && test $pcre2_cv_cc_visibility = yes + then + VISIBILITY_CFLAGS="-fvisibility=hidden" + HAVE_VISIBILITY=1 + +printf "%s\n" "#define PCRE2_EXPORT __attribute__ ((visibility (\"default\")))" >>confdefs.h + else + +printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h + + fi @@ -14037,6 +13946,96 @@ printf "%s\n" "#define HAVE_VISIBILITY $HAVE_VISIBILITY" >>confdefs.h +# Check for Clang __attribute__((uninitialized)) feature + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5 +printf %s "checking for __attribute__((uninitialized))... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +tmp_CFLAGS=$CFLAGS +if test $WORKING_WERROR -eq 1; then + CFLAGS="$CFLAGS -Werror" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +char buf[128] __attribute__((uninitialized));(void)buf + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cc_cv_attribute_uninitialized=yes +else $as_nop + pcre2_cc_cv_attribute_uninitialized=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5 +printf "%s\n" "$pcre2_cc_cv_attribute_uninitialized" >&6; } +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + +printf "%s\n" "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h + +fi +CFLAGS=$tmp_CFLAGS +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Check for the assume() builtin + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __assume()" >&5 +printf %s "checking for __assume()... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +__assume(1) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pcre2_cc_cv_builtin_assume=yes +else $as_nop + pcre2_cc_cv_builtin_assume=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_assume" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_assume" >&6; } +if test "$pcre2_cc_cv_builtin_assume" = yes; then + +printf "%s\n" "#define HAVE_BUILTIN_ASSUME 1" >>confdefs.h + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # Check for the mul_overflow() builtin { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_mul_overflow()" >&5 @@ -14069,9 +14068,8 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : pcre2_cc_cv_builtin_mul_overflow=yes -else case e in #( - e) pcre2_cc_cv_builtin_mul_overflow=no ;; -esac +else $as_nop + pcre2_cc_cv_builtin_mul_overflow=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -14089,45 +14087,42 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu -# Check for Clang __attribute__((uninitialized)) feature +# Check for the unreachable() builtin -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5 -printf %s "checking for __attribute__((uninitialized))... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable()" >&5 +printf %s "checking for __builtin_unreachable()... " >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -tmp_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -Werror" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ - +int r; int main (void) { -char buf[128] __attribute__((uninitialized));(void)buf +if (r) __builtin_unreachable() ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO" +if ac_fn_c_try_link "$LINENO" then : - pcre2_cc_cv_attribute_uninitialized=yes -else case e in #( - e) pcre2_cc_cv_attribute_uninitialized=no ;; -esac + pcre2_cc_cv_builtin_unreachable=yes +else $as_nop + pcre2_cc_cv_builtin_unreachable=no fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5 -printf "%s\n" "$pcre2_cc_cv_attribute_uninitialized" >&6; } -if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_unreachable" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_unreachable" >&6; } +if test "$pcre2_cc_cv_builtin_unreachable" = yes; then -printf "%s\n" "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h +printf "%s\n" "#define HAVE_BUILTIN_UNREACHABLE 1" >>confdefs.h fi -CFLAGS=$tmp_CFLAGS ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -14138,9 +14133,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # Versioning PCRE2_MAJOR="10" -PCRE2_MINOR="44" -PCRE2_PRERELEASE="" -PCRE2_DATE="2024-06-07" +PCRE2_MINOR="45" +PCRE2_PRERELEASE="-RC1" +PCRE2_DATE="2024-12-27" if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" then @@ -14167,27 +14162,24 @@ fi if test ${enable_pcre8+y} then : enableval=$enable_pcre8; -else case e in #( - e) enable_pcre8=no ;; -esac +else $as_nop + enable_pcre8=no fi # Check whether --enable-pcre16 was given. if test ${enable_pcre16+y} then : enableval=$enable_pcre16; -else case e in #( - e) enable_pcre16=no ;; -esac +else $as_nop + enable_pcre16=no fi # Check whether --enable-pcre32 was given. if test ${enable_pcre32+y} then : enableval=$enable_pcre32; -else case e in #( - e) enable_pcre32=no ;; -esac +else $as_nop + enable_pcre32=no fi @@ -14202,9 +14194,8 @@ fi if test ${enable_pcre2_8+y} then : enableval=$enable_pcre2_8; -else case e in #( - e) enable_pcre2_8=unset ;; -esac +else $as_nop + enable_pcre2_8=unset fi @@ -14214,9 +14205,8 @@ fi if test ${enable_pcre2_16+y} then : enableval=$enable_pcre2_16; -else case e in #( - e) enable_pcre2_16=unset ;; -esac +else $as_nop + enable_pcre2_16=unset fi @@ -14226,9 +14216,8 @@ fi if test ${enable_pcre2_32+y} then : enableval=$enable_pcre2_32; -else case e in #( - e) enable_pcre2_32=unset ;; -esac +else $as_nop + enable_pcre2_32=unset fi @@ -14238,9 +14227,8 @@ fi if test ${enable_debug+y} then : enableval=$enable_debug; -else case e in #( - e) enable_debug=no ;; -esac +else $as_nop + enable_debug=no fi @@ -14249,9 +14237,8 @@ fi if test ${enable_jit+y} then : enableval=$enable_jit; -else case e in #( - e) enable_jit=no ;; -esac +else $as_nop + enable_jit=no fi @@ -14269,7 +14256,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu /* end confdefs.h. */ #define SLJIT_CONFIG_AUTO 1 - #include "src/sljit/sljitConfigCPU.h" + #include "deps/sljit/sljit_src/sljitConfigCPU.h" #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #error unsupported #endif @@ -14277,9 +14264,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : enable_jit=yes -else case e in #( - e) enable_jit=no ;; -esac +else $as_nop + enable_jit=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CPPFLAGS=$SAVE_CPPFLAGS @@ -14293,9 +14279,8 @@ case $host_os in if test ${enable_jit_sealloc+y} then : enableval=$enable_jit_sealloc; -else case e in #( - e) enable_jit_sealloc=no ;; -esac +else $as_nop + enable_jit_sealloc=no fi ;; @@ -14309,9 +14294,8 @@ esac if test ${enable_pcre2grep_jit+y} then : enableval=$enable_pcre2grep_jit; -else case e in #( - e) enable_pcre2grep_jit=yes ;; -esac +else $as_nop + enable_pcre2grep_jit=yes fi @@ -14320,9 +14304,8 @@ fi if test ${enable_pcre2grep_callout+y} then : enableval=$enable_pcre2grep_callout; -else case e in #( - e) enable_pcre2grep_callout=yes ;; -esac +else $as_nop + enable_pcre2grep_callout=yes fi @@ -14331,9 +14314,8 @@ fi if test ${enable_pcre2grep_callout_fork+y} then : enableval=$enable_pcre2grep_callout_fork; -else case e in #( - e) enable_pcre2grep_callout_fork=yes ;; -esac +else $as_nop + enable_pcre2grep_callout_fork=yes fi @@ -14342,9 +14324,8 @@ fi if test ${enable_rebuild_chartables+y} then : enableval=$enable_rebuild_chartables; -else case e in #( - e) enable_rebuild_chartables=no ;; -esac +else $as_nop + enable_rebuild_chartables=no fi @@ -14353,9 +14334,8 @@ fi if test ${enable_unicode+y} then : enableval=$enable_unicode; -else case e in #( - e) enable_unicode=unset ;; -esac +else $as_nop + enable_unicode=unset fi @@ -14404,9 +14384,8 @@ enable_newline="$ac_pcre2_newline" if test ${enable_bsr_anycrlf+y} then : enableval=$enable_bsr_anycrlf; -else case e in #( - e) enable_bsr_anycrlf=no ;; -esac +else $as_nop + enable_bsr_anycrlf=no fi @@ -14415,9 +14394,8 @@ fi if test ${enable_never_backslash_C+y} then : enableval=$enable_never_backslash_C; -else case e in #( - e) enable_never_backslash_C=no ;; -esac +else $as_nop + enable_never_backslash_C=no fi @@ -14426,9 +14404,8 @@ fi if test ${enable_ebcdic+y} then : enableval=$enable_ebcdic; -else case e in #( - e) enable_ebcdic=no ;; -esac +else $as_nop + enable_ebcdic=no fi @@ -14437,9 +14414,8 @@ fi if test ${enable_ebcdic_nl25+y} then : enableval=$enable_ebcdic_nl25; -else case e in #( - e) enable_ebcdic_nl25=no ;; -esac +else $as_nop + enable_ebcdic_nl25=no fi @@ -14448,9 +14424,8 @@ fi if test ${enable_pcre2grep_libz+y} then : enableval=$enable_pcre2grep_libz; -else case e in #( - e) enable_pcre2grep_libz=no ;; -esac +else $as_nop + enable_pcre2grep_libz=no fi @@ -14459,9 +14434,8 @@ fi if test ${enable_pcre2grep_libbz2+y} then : enableval=$enable_pcre2grep_libbz2; -else case e in #( - e) enable_pcre2grep_libbz2=no ;; -esac +else $as_nop + enable_pcre2grep_libbz2=no fi @@ -14471,9 +14445,8 @@ fi if test ${with_pcre2grep_bufsize+y} then : withval=$with_pcre2grep_bufsize; -else case e in #( - e) with_pcre2grep_bufsize=20480 ;; -esac +else $as_nop + with_pcre2grep_bufsize=20480 fi @@ -14483,9 +14456,8 @@ fi if test ${with_pcre2grep_max_bufsize+y} then : withval=$with_pcre2grep_max_bufsize; -else case e in #( - e) with_pcre2grep_max_bufsize=1048576 ;; -esac +else $as_nop + with_pcre2grep_max_bufsize=1048576 fi @@ -14494,9 +14466,8 @@ fi if test ${enable_pcre2test_libedit+y} then : enableval=$enable_pcre2test_libedit; -else case e in #( - e) enable_pcre2test_libedit=no ;; -esac +else $as_nop + enable_pcre2test_libedit=no fi @@ -14505,9 +14476,8 @@ fi if test ${enable_pcre2test_libreadline+y} then : enableval=$enable_pcre2test_libreadline; -else case e in #( - e) enable_pcre2test_libreadline=no ;; -esac +else $as_nop + enable_pcre2test_libreadline=no fi @@ -14517,9 +14487,8 @@ fi if test ${with_link_size+y} then : withval=$with_link_size; -else case e in #( - e) with_link_size=2 ;; -esac +else $as_nop + with_link_size=2 fi @@ -14529,9 +14498,8 @@ fi if test ${with_max_varlookbehind+y} then : withval=$with_max_varlookbehind; -else case e in #( - e) with_max_varlookbehind=255 ;; -esac +else $as_nop + with_max_varlookbehind=255 fi @@ -14541,9 +14509,8 @@ fi if test ${with_parens_nest_limit+y} then : withval=$with_parens_nest_limit; -else case e in #( - e) with_parens_nest_limit=250 ;; -esac +else $as_nop + with_parens_nest_limit=250 fi @@ -14553,9 +14520,8 @@ fi if test ${with_heap_limit+y} then : withval=$with_heap_limit; -else case e in #( - e) with_heap_limit=20000000 ;; -esac +else $as_nop + with_heap_limit=20000000 fi @@ -14565,9 +14531,8 @@ fi if test ${with_match_limit+y} then : withval=$with_match_limit; -else case e in #( - e) with_match_limit=10000000 ;; -esac +else $as_nop + with_match_limit=10000000 fi @@ -14585,9 +14550,8 @@ fi if test ${with_match_limit_depth+y} then : withval=$with_match_limit_depth; -else case e in #( - e) with_match_limit_depth=MATCH_LIMIT ;; -esac +else $as_nop + with_match_limit_depth=MATCH_LIMIT fi @@ -14596,9 +14560,8 @@ fi if test ${with_match_limit_recursion+y} then : withval=$with_match_limit_recursion; -else case e in #( - e) with_match_limit_recursion=UNSET ;; -esac +else $as_nop + with_match_limit_recursion=UNSET fi @@ -14607,9 +14570,8 @@ fi if test ${enable_valgrind+y} then : enableval=$enable_valgrind; -else case e in #( - e) enable_valgrind=no ;; -esac +else $as_nop + enable_valgrind=no fi @@ -14618,9 +14580,8 @@ fi if test ${enable_coverage+y} then : enableval=$enable_coverage; -else case e in #( - e) enable_coverage=no ;; -esac +else $as_nop + enable_coverage=no fi @@ -14629,9 +14590,8 @@ fi if test ${enable_fuzz_support+y} then : enableval=$enable_fuzz_support; -else case e in #( - e) enable_fuzz_support=no ;; -esac +else $as_nop + enable_fuzz_support=no fi @@ -14640,9 +14600,8 @@ fi if test ${enable_diff_fuzz_support+y} then : enableval=$enable_diff_fuzz_support; -else case e in #( - e) enable_diff_fuzz_support=no ;; -esac +else $as_nop + enable_diff_fuzz_support=no fi @@ -14652,9 +14611,8 @@ fi if test ${enable_stack_for_recursion+y} then : enableval=$enable_stack_for_recursion; -else case e in #( - e) enable_stack_for_recursion=yes ;; -esac +else $as_nop + enable_stack_for_recursion=yes fi @@ -14669,9 +14627,8 @@ fi if test ${enable_percent_zt+y} then : enableval=$enable_percent_zt; -else case e in #( - e) enable_percent_zt=auto ;; -esac +else $as_nop + enable_percent_zt=auto fi @@ -14752,6 +14709,12 @@ esac # Checks for header files. +ac_fn_c_check_header_compile "$LINENO" "assert.h" "ac_cv_header_assert_h" "$ac_includes_default" +if test "x$ac_cv_header_assert_h" = xyes +then : + printf "%s\n" "#define HAVE_ASSERT_H 1" >>confdefs.h + +fi ac_fn_c_check_header_compile "$LINENO" "limits.h" "ac_cv_header_limits_h" "$ac_includes_default" if test "x$ac_cv_header_limits_h" = xyes then : @@ -14906,8 +14869,8 @@ printf %s "checking for an ANSI C-conforming const... " >&6; } if test ${ac_cv_c_const+y} then : printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int @@ -14971,12 +14934,10 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_c_const=yes -else case e in #( - e) ac_cv_c_const=no ;; -esac +else $as_nop + ac_cv_c_const=no fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 printf "%s\n" "$ac_cv_c_const" >&6; } @@ -14990,11 +14951,10 @@ ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" if test "x$ac_cv_type_size_t" = xyes then : -else case e in #( - e) +else $as_nop + printf "%s\n" "#define size_t unsigned int" >>confdefs.h - ;; -esac + fi @@ -15064,10 +15024,9 @@ printf "%s\n" "yes" >&6; } printf "%s\n" "#define HAVE_REALPATH 1" >>confdefs.h -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } ;; -esac +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -15089,22 +15048,16 @@ printf %s "checking for gzopen in -lz... " >&6; } if test ${ac_cv_lib_z_gzopen+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lz $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char gzopen (void); + builtin and then its argument prototype would still apply. */ +char gzopen (); int main (void) { @@ -15116,14 +15069,12 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_z_gzopen=yes -else case e in #( - e) ac_cv_lib_z_gzopen=no ;; -esac +else $as_nop + ac_cv_lib_z_gzopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gzopen" >&5 printf "%s\n" "$ac_cv_lib_z_gzopen" >&6; } @@ -15186,10 +15137,9 @@ if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; };HAVE_LIBBZ2=1; break; -else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } ;; -esac +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext @@ -15223,22 +15173,16 @@ printf %s "checking for readline in -lreadline... " >&6; } if test ${ac_cv_lib_readline_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lreadline $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15250,43 +15194,35 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_readline_readline=yes -else case e in #( - e) ac_cv_lib_readline_readline=no ;; -esac +else $as_nop + ac_cv_lib_readline_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } if test "x$ac_cv_lib_readline_readline" = xyes then : LIBREADLINE="-lreadline" -else case e in #( - e) unset ac_cv_lib_readline_readline; +else $as_nop + unset ac_cv_lib_readline_readline; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 printf %s "checking for readline in -lreadline... " >&6; } if test ${ac_cv_lib_readline_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lreadline -ltinfo $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15298,43 +15234,35 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_readline_readline=yes -else case e in #( - e) ac_cv_lib_readline_readline=no ;; -esac +else $as_nop + ac_cv_lib_readline_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } if test "x$ac_cv_lib_readline_readline" = xyes then : LIBREADLINE="-ltinfo" -else case e in #( - e) unset ac_cv_lib_readline_readline; +else $as_nop + unset ac_cv_lib_readline_readline; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 printf %s "checking for readline in -lreadline... " >&6; } if test ${ac_cv_lib_readline_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lreadline -lcurses $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15346,43 +15274,35 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_readline_readline=yes -else case e in #( - e) ac_cv_lib_readline_readline=no ;; -esac +else $as_nop + ac_cv_lib_readline_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } if test "x$ac_cv_lib_readline_readline" = xyes then : LIBREADLINE="-lcurses" -else case e in #( - e) unset ac_cv_lib_readline_readline; +else $as_nop + unset ac_cv_lib_readline_readline; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 printf %s "checking for readline in -lreadline... " >&6; } if test ${ac_cv_lib_readline_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lreadline -lncurses $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15394,43 +15314,35 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_readline_readline=yes -else case e in #( - e) ac_cv_lib_readline_readline=no ;; -esac +else $as_nop + ac_cv_lib_readline_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } if test "x$ac_cv_lib_readline_readline" = xyes then : LIBREADLINE="-lncurses" -else case e in #( - e) unset ac_cv_lib_readline_readline; +else $as_nop + unset ac_cv_lib_readline_readline; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 printf %s "checking for readline in -lreadline... " >&6; } if test ${ac_cv_lib_readline_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lreadline -lncursesw $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15442,43 +15354,35 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_readline_readline=yes -else case e in #( - e) ac_cv_lib_readline_readline=no ;; -esac +else $as_nop + ac_cv_lib_readline_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } if test "x$ac_cv_lib_readline_readline" = xyes then : LIBREADLINE="-lncursesw" -else case e in #( - e) unset ac_cv_lib_readline_readline; +else $as_nop + unset ac_cv_lib_readline_readline; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 printf %s "checking for readline in -lreadline... " >&6; } if test ${ac_cv_lib_readline_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-lreadline -ltermcap $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15490,38 +15394,30 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_readline_readline=yes -else case e in #( - e) ac_cv_lib_readline_readline=no ;; -esac +else $as_nop + ac_cv_lib_readline_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } if test "x$ac_cv_lib_readline_readline" = xyes then : LIBREADLINE="-ltermcap" -else case e in #( - e) LIBREADLINE="" ;; -esac +else $as_nop + LIBREADLINE="" fi - ;; -esac + fi - ;; -esac + fi - ;; -esac + fi - ;; -esac + fi - ;; -esac + fi @@ -15539,12 +15435,12 @@ fi if test "$enable_pcre2test_libedit" = "yes"; then for ac_header in editline/readline.h edit/readline/readline.h readline.h do : - as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes" then : cat >>confdefs.h <<_ACEOF -#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 +#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF HAVE_LIBEDIT_HEADER=1 @@ -15558,22 +15454,16 @@ printf %s "checking for readline in -ledit... " >&6; } if test ${ac_cv_lib_edit_readline+y} then : printf %s "(cached) " >&6 -else case e in #( - e) ac_check_lib_save_LIBS=$LIBS +else $as_nop + ac_check_lib_save_LIBS=$LIBS LIBS="-ledit $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char readline (void); + builtin and then its argument prototype would still apply. */ +char readline (); int main (void) { @@ -15585,14 +15475,12 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_edit_readline=yes -else case e in #( - e) ac_cv_lib_edit_readline=no ;; -esac +else $as_nop + ac_cv_lib_edit_readline=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS ;; -esac +LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_edit_readline" >&5 printf "%s\n" "$ac_cv_lib_edit_readline" >&6; } @@ -15685,14 +15573,8 @@ printf %s "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. - The 'extern "C"' is for builds by C++ compilers; - although this is not generally supported in C code supporting it here - has little cost and some practical benefit (sr 110532). */ -#ifdef __cplusplus -extern "C" -#endif -char pthread_join (void); + builtin and then its argument prototype would still apply. */ +char pthread_join (); int main (void) { @@ -15791,8 +15673,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ax_pthread_config+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$ax_pthread_config"; then +else $as_nop + if test -n "$ax_pthread_config"; then ac_cv_prog_ax_pthread_config="$ax_pthread_config" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -15815,8 +15697,7 @@ done IFS=$as_save_IFS test -z "$ac_cv_prog_ax_pthread_config" && ac_cv_prog_ax_pthread_config="no" -fi ;; -esac +fi fi ax_pthread_config=$ac_cv_prog_ax_pthread_config if test -n "$ax_pthread_config"; then @@ -15956,8 +15837,8 @@ printf %s "checking for PTHREAD_PRIO_INHERIT... " >&6; } if test ${ax_cv_PTHREAD_PRIO_INHERIT+y} then : printf %s "(cached) " >&6 -else case e in #( - e) +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -15973,14 +15854,12 @@ _ACEOF if ac_fn_c_try_link "$LINENO" then : ax_cv_PTHREAD_PRIO_INHERIT=yes -else case e in #( - e) ax_cv_PTHREAD_PRIO_INHERIT=no ;; -esac +else $as_nop + ax_cv_PTHREAD_PRIO_INHERIT=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext - ;; -esac + fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_PRIO_INHERIT" >&5 printf "%s\n" "$ax_cv_PTHREAD_PRIO_INHERIT" >&6; } @@ -16005,8 +15884,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_PTHREAD_CC+y} then : printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$PTHREAD_CC"; then +else $as_nop + if test -n "$PTHREAD_CC"; then ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR @@ -16028,8 +15907,7 @@ done done IFS=$as_save_IFS -fi ;; -esac +fi fi PTHREAD_CC=$ac_cv_prog_PTHREAD_CC if test -n "$PTHREAD_CC"; then @@ -16268,16 +16146,16 @@ esac # are m4 variables, assigned above. EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \ - $NO_UNDEFINED -version-info 13:0:13" + $NO_UNDEFINED -version-info 14:0:14" EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \ - $NO_UNDEFINED -version-info 13:0:13" + $NO_UNDEFINED -version-info 14:0:14" EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \ - $NO_UNDEFINED -version-info 13:0:13" + $NO_UNDEFINED -version-info 14:0:14" EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \ - $NO_UNDEFINED -version-info 3:5:0" + $NO_UNDEFINED -version-info 3:6:0" @@ -16370,8 +16248,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_PKG_CONFIG+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $PKG_CONFIG in +else $as_nop + case $PKG_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. ;; @@ -16396,7 +16274,6 @@ done IFS=$as_save_IFS ;; -esac ;; esac fi PKG_CONFIG=$ac_cv_path_PKG_CONFIG @@ -16419,8 +16296,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $ac_pt_PKG_CONFIG in +else $as_nop + case $ac_pt_PKG_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. ;; @@ -16445,7 +16322,6 @@ done IFS=$as_save_IFS ;; -esac ;; esac fi ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG @@ -16558,8 +16434,8 @@ See the pkg-config man page for more details." "$LINENO" 5 elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. @@ -16569,7 +16445,7 @@ and VALGRIND_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details. To get pkg-config, see . -See 'config.log' for more details" "$LINENO" 5; } +See \`config.log' for more details" "$LINENO" 5; } else VALGRIND_CFLAGS=$pkg_cv_VALGRIND_CFLAGS VALGRIND_LIBS=$pkg_cv_VALGRIND_LIBS @@ -16593,8 +16469,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_SHTOOL+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $SHTOOL in +else $as_nop + case $SHTOOL in [\\/]* | ?:[\\/]*) ac_cv_path_SHTOOL="$SHTOOL" # Let the user override the test with a path. ;; @@ -16620,7 +16496,6 @@ IFS=$as_save_IFS test -z "$ac_cv_path_SHTOOL" && ac_cv_path_SHTOOL="false" ;; -esac ;; esac fi SHTOOL=$ac_cv_path_SHTOOL @@ -16652,8 +16527,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_LCOV+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $LCOV in +else $as_nop + case $LCOV in [\\/]* | ?:[\\/]*) ac_cv_path_LCOV="$LCOV" # Let the user override the test with a path. ;; @@ -16679,7 +16554,6 @@ IFS=$as_save_IFS test -z "$ac_cv_path_LCOV" && ac_cv_path_LCOV="false" ;; -esac ;; esac fi LCOV=$ac_cv_path_LCOV @@ -16704,8 +16578,8 @@ printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_GENHTML+y} then : printf %s "(cached) " >&6 -else case e in #( - e) case $GENHTML in +else $as_nop + case $GENHTML in [\\/]* | ?:[\\/]*) ac_cv_path_GENHTML="$GENHTML" # Let the user override the test with a path. ;; @@ -16731,7 +16605,6 @@ IFS=$as_save_IFS test -z "$ac_cv_path_GENHTML" && ac_cv_path_GENHTML="false" ;; -esac ;; esac fi GENHTML=$ac_cv_path_GENHTML @@ -16790,9 +16663,8 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : pcre2_cc_cv_intel_cet_enabled=yes -else case e in #( - e) pcre2_cc_cv_intel_cet_enabled=no ;; -esac +else $as_nop + pcre2_cc_cv_intel_cet_enabled=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_intel_cet_enabled" >&5 @@ -16836,8 +16708,8 @@ cat >confcache <<\_ACEOF # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # -# 'ac_cv_env_foo' variables (set or unset) will be overridden when -# loading this file, other *unset* 'ac_cv_foo' will be assigned the +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF @@ -16867,14 +16739,14 @@ printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) - # 'set' does not quote correctly, so add quotes: double-quote + # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) - # 'set' quotes correctly as required by POSIX, so do not add quotes. + # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | @@ -16959,12 +16831,6 @@ if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi -# Check whether --enable-year2038 was given. -if test ${enable_year2038+y} -then : - enableval=$enable_year2038; -fi - if test -z "${WITH_PCRE2_8_TRUE}" && test -z "${WITH_PCRE2_8_FALSE}"; then as_fn_error $? "conditional \"WITH_PCRE2_8\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -17038,6 +16904,7 @@ cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh +as_nop=: if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh @@ -17046,13 +16913,12 @@ then : # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else case e in #( - e) case `(set -o) 2>/dev/null` in #( +else $as_nop + case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; -esac ;; esac fi @@ -17124,7 +16990,7 @@ IFS=$as_save_IFS ;; esac -# We did not find ourselves, most probably we were run as 'sh COMMAND' +# We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 @@ -17153,6 +17019,7 @@ as_fn_error () } # as_fn_error + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -17192,12 +17059,11 @@ then : { eval $1+=\$2 }' -else case e in #( - e) as_fn_append () +else $as_nop + as_fn_append () { eval $1=\$$1\$2 - } ;; -esac + } fi # as_fn_append # as_fn_arith ARG... @@ -17211,12 +17077,11 @@ then : { as_val=$(( $* )) }' -else case e in #( - e) as_fn_arith () +else $as_nop + as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` - } ;; -esac + } fi # as_fn_arith @@ -17299,9 +17164,9 @@ if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: - # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. - # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. - # In both cases, we have to default to 'cp -pR'. + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then @@ -17382,12 +17247,10 @@ as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. -as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" -as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. -as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" -as_tr_sh="eval sed '$as_sed_sh'" # deprecated +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 @@ -17402,8 +17265,8 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PCRE2 $as_me 10.44, which was -generated by GNU Autoconf 2.72. Invocation command line was +This file was extended by PCRE2 $as_me 10.45-RC1, which was +generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -17435,7 +17298,7 @@ _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ -'$as_me' instantiates files and other configuration actions +\`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. @@ -17470,11 +17333,11 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -PCRE2 config.status 10.44 -configured by $0, generated by GNU Autoconf 2.72, +PCRE2 config.status 10.45-RC1 +configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" -Copyright (C) 2023 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." @@ -17536,8 +17399,8 @@ do ac_need_defaults=false;; --he | --h) # Conflict between --help and --header - as_fn_error $? "ambiguous option: '$1' -Try '$0 --help' for more information.";; + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; --help | --hel | -h ) printf "%s\n" "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ @@ -17545,8 +17408,8 @@ Try '$0 --help' for more information.";; ac_cs_silent=: ;; # This is an error. - -*) as_fn_error $? "unrecognized option: '$1' -Try '$0 --help' for more information." ;; + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; @@ -17899,7 +17762,7 @@ do "script-chmod") CONFIG_COMMANDS="$CONFIG_COMMANDS script-chmod" ;; "delete-old-chartables") CONFIG_COMMANDS="$CONFIG_COMMANDS delete-old-chartables" ;; - *) as_fn_error $? "invalid argument: '$ac_config_target'" "$LINENO" 5;; + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done @@ -17919,7 +17782,7 @@ fi # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: -# after its creation but before its name has been assigned to '$tmp'. +# after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= @@ -17943,7 +17806,7 @@ ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. -# This happens for instance with './config.status config.h'. +# This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then @@ -18101,13 +17964,13 @@ fi # test -n "$CONFIG_FILES" # Set up the scripts for CONFIG_HEADERS section. # No need to generate them if there are no CONFIG_HEADERS. -# This happens for instance with './config.status Makefile'. +# This happens for instance with `./config.status Makefile'. if test -n "$CONFIG_HEADERS"; then cat >"$ac_tmp/defines.awk" <<\_ACAWK || BEGIN { _ACEOF -# Transform confdefs.h into an awk script 'defines.awk', embedded as +# Transform confdefs.h into an awk script `defines.awk', embedded as # here-document in config.status, that substitutes the proper values into # config.h.in to produce config.h. @@ -18217,7 +18080,7 @@ do esac case $ac_mode$ac_tag in :[FHL]*:*);; - :L* | :C*:*) as_fn_error $? "invalid tag '$ac_tag'" "$LINENO" 5;; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac @@ -18239,19 +18102,19 @@ do -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, - # because $ac_f cannot contain ':'. + # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || - as_fn_error 1 "cannot find input file: '$ac_f'" "$LINENO" 5;; + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done - # Let's still pretend it is 'configure' which instantiates (i.e., don't + # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` @@ -18384,7 +18247,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 esac _ACEOF -# Neutralize VPATH when '$srcdir' = '.'. +# Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 @@ -18415,9 +18278,9 @@ test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable 'datarootdir' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 -printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 'datarootdir' +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" @@ -18572,15 +18435,15 @@ printf "%s\n" X/"$am_mf" | (exit $ac_status); } || am_rc=$? done if test $am_rc -ne 0; then - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "Something went wrong bootstrapping makefile fragments for automatic dependency tracking. If GNU make was not used, consider re-running the configure script with MAKE=\"gmake\" (or whatever is necessary). You can also try re-running configure with the '--disable-dependency-tracking' option to at least be able to build the package (albeit without support for automatic dependency tracking). -See 'config.log' for more details" "$LINENO" 5; } +See \`config.log' for more details" "$LINENO" 5; } fi { am_dirpart=; unset am_dirpart;} { am_filepart=; unset am_filepart;} @@ -18609,13 +18472,13 @@ See 'config.log' for more details" "$LINENO" 5; } # Provide generalized library-building support services. # Written by Gordon Matzigkeit, 1996 -# Copyright (C) 2024 Free Software Foundation, Inc. +# Copyright (C) 2014 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # GNU Libtool is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or +# the Free Software Foundation; either version 2 of of the License, or # (at your option) any later version. # # As a special exception to the GNU General Public License, if you @@ -18999,7 +18862,7 @@ hardcode_direct=$hardcode_direct # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary and the resulting library dependency is -# "absolute",i.e. impossible to change by setting \$shlibpath_var if the +# "absolute",i.e impossible to change by setting \$shlibpath_var if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute diff --git a/configure.ac b/configure.ac index 6091ea4..6b28f38 100644 --- a/configure.ac +++ b/configure.ac @@ -9,23 +9,23 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [44]) -m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2024-06-07]) +m4_define(pcre2_minor, [45]) +m4_define(pcre2_prerelease, [-RC1]) +m4_define(pcre2_date, [2024-12-27]) # Libtool shared library interface versions (current:revision:age) -m4_define(libpcre2_8_version, [13:0:13]) -m4_define(libpcre2_16_version, [13:0:13]) -m4_define(libpcre2_32_version, [13:0:13]) -m4_define(libpcre2_posix_version, [3:5:0]) +m4_define(libpcre2_8_version, [14:0:14]) +m4_define(libpcre2_16_version, [14:0:14]) +m4_define(libpcre2_32_version, [14:0:14]) +m4_define(libpcre2_posix_version, [3:6:0]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. -AC_PREREQ([2.62]) +AC_PREREQ([2.60]) AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2]) AC_CONFIG_SRCDIR([src/pcre2.h.in]) -AM_INIT_AUTOMAKE([dist-bzip2 dist-zip]) +AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_CONFIG_HEADERS(src/config.h) @@ -73,6 +73,40 @@ AC_SYS_LARGEFILE PCRE2_VISIBILITY +# Check for Clang __attribute__((uninitialized)) feature + +AC_MSG_CHECKING([for __attribute__((uninitialized))]) +AC_LANG_PUSH([C]) +tmp_CFLAGS=$CFLAGS +if test $WORKING_WERROR -eq 1; then + CFLAGS="$CFLAGS -Werror" +fi +AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, + [[char buf[128] __attribute__((uninitialized));(void)buf]])], + [pcre2_cc_cv_attribute_uninitialized=yes], + [pcre2_cc_cv_attribute_uninitialized=no]) +AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized]) +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler + supports __attribute__((uninitialized))]) +fi +CFLAGS=$tmp_CFLAGS +AC_LANG_POP([C]) + +# Check for the assume() builtin + +AC_MSG_CHECKING([for __assume()]) +AC_LANG_PUSH([C]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[__assume(1)]])], + [pcre2_cc_cv_builtin_assume=yes], + [pcre2_cc_cv_builtin_assume=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_assume]) +if test "$pcre2_cc_cv_builtin_assume" = yes; then + AC_DEFINE([HAVE_BUILTIN_ASSUME], 1, + [Define this if your compiler provides __assume()]) +fi +AC_LANG_POP([C]) + # Check for the mul_overflow() builtin AC_MSG_CHECKING([for __builtin_mul_overflow()]) @@ -95,22 +129,18 @@ if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then fi AC_LANG_POP([C]) -# Check for Clang __attribute__((uninitialized)) feature +# Check for the unreachable() builtin -AC_MSG_CHECKING([for __attribute__((uninitialized))]) +AC_MSG_CHECKING([for __builtin_unreachable()]) AC_LANG_PUSH([C]) -tmp_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -Werror" -AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, - [[char buf[128] __attribute__((uninitialized));(void)buf]])], - [pcre2_cc_cv_attribute_uninitialized=yes], - [pcre2_cc_cv_attribute_uninitialized=no]) -AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized]) -if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then - AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler - supports __attribute__((uninitialized))]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])], + [pcre2_cc_cv_builtin_unreachable=yes], + [pcre2_cc_cv_builtin_unreachable=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable]) +if test "$pcre2_cc_cv_builtin_unreachable" = yes; then + AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1, + [Define this if your compiler provides __builtin_unreachable()]) fi -CFLAGS=$tmp_CFLAGS AC_LANG_POP([C]) # Versioning @@ -191,7 +221,7 @@ if test "$enable_jit" = "auto"; then CPPFLAGS=-I$srcdir AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #define SLJIT_CONFIG_AUTO 1 - #include "src/sljit/sljitConfigCPU.h" + #include "deps/sljit/sljit_src/sljitConfigCPU.h" #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #error unsupported #endif]])], enable_jit=yes, enable_jit=no) @@ -285,7 +315,7 @@ AC_ARG_ENABLE(never-backslash-C, # Handle --enable-ebcdic AC_ARG_ENABLE(ebcdic, AS_HELP_STRING([--enable-ebcdic], - [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), + [assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), , enable_ebcdic=no) # Handle --enable-ebcdic-nl25 @@ -521,7 +551,7 @@ HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make sure both macros are undefined; an emulation function will then be used. */]) # Checks for header files. -AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h) +AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h) AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) diff --git a/src/sljit/allocator_src/sljitExecAllocatorApple.c b/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c similarity index 100% rename from src/sljit/allocator_src/sljitExecAllocatorApple.c rename to deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c diff --git a/src/sljit/allocator_src/sljitExecAllocatorCore.c b/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c similarity index 100% rename from src/sljit/allocator_src/sljitExecAllocatorCore.c rename to deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c diff --git a/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c b/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c similarity index 100% rename from src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c rename to deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c diff --git a/src/sljit/allocator_src/sljitExecAllocatorPosix.c b/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c similarity index 100% rename from src/sljit/allocator_src/sljitExecAllocatorPosix.c rename to deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c diff --git a/src/sljit/allocator_src/sljitExecAllocatorWindows.c b/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c similarity index 100% rename from src/sljit/allocator_src/sljitExecAllocatorWindows.c rename to deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c diff --git a/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c b/deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c similarity index 100% rename from src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c rename to deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c diff --git a/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c b/deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c similarity index 100% rename from src/sljit/allocator_src/sljitProtExecAllocatorPosix.c rename to deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c diff --git a/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c b/deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c similarity index 100% rename from src/sljit/allocator_src/sljitWXExecAllocatorPosix.c rename to deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c diff --git a/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c b/deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c similarity index 100% rename from src/sljit/allocator_src/sljitWXExecAllocatorWindows.c rename to deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c diff --git a/src/sljit/sljitConfig.h b/deps/sljit/sljit_src/sljitConfig.h similarity index 93% rename from src/sljit/sljitConfig.h rename to deps/sljit/sljit_src/sljitConfig.h index 364c8bb..993f4fe 100644 --- a/src/sljit/sljitConfig.h +++ b/deps/sljit/sljit_src/sljitConfig.h @@ -29,7 +29,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif /* __cplusplus */ /* This file contains the basic configuration options for the SLJIT compiler @@ -47,19 +47,19 @@ extern "C" { #ifndef SLJIT_UTIL_STACK /* Enabled by default */ #define SLJIT_UTIL_STACK 1 -#endif +#endif /* SLJIT_UTIL_STACK */ /* Uses user provided allocator to allocate the stack (see SLJIT_UTIL_STACK) */ #ifndef SLJIT_UTIL_SIMPLE_STACK_ALLOCATION /* Disabled by default */ #define SLJIT_UTIL_SIMPLE_STACK_ALLOCATION 0 -#endif +#endif /* SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ /* Single threaded application. Does not require any locks. */ #ifndef SLJIT_SINGLE_THREADED /* Disabled by default. */ #define SLJIT_SINGLE_THREADED 0 -#endif +#endif /* SLJIT_SINGLE_THREADED */ /* --------------------------------------------------------------------- */ /* Configuration */ @@ -70,7 +70,7 @@ extern "C" { #ifndef SLJIT_STD_MACROS_DEFINED /* Disabled by default. */ #define SLJIT_STD_MACROS_DEFINED 0 -#endif +#endif /* SLJIT_STD_MACROS_DEFINED */ /* Executable code allocation: If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should @@ -93,7 +93,7 @@ extern "C" { #ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR /* Disabled by default. */ #define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 -#endif +#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */ /* When SLJIT_WX_EXECUTABLE_ALLOCATOR is enabled SLJIT uses an allocator which does not set writable and executable permission @@ -104,7 +104,7 @@ extern "C" { #ifndef SLJIT_WX_EXECUTABLE_ALLOCATOR /* Disabled by default. */ #define SLJIT_WX_EXECUTABLE_ALLOCATOR 0 -#endif +#endif /* SLJIT_WX_EXECUTABLE_ALLOCATOR */ #endif /* !SLJIT_EXECUTABLE_ALLOCATOR */ @@ -112,19 +112,19 @@ extern "C" { #ifndef SLJIT_ARGUMENT_CHECKS /* Disabled by default */ #define SLJIT_ARGUMENT_CHECKS 0 -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ /* Debug checks (assertions, etc.). */ #ifndef SLJIT_DEBUG /* Enabled by default */ #define SLJIT_DEBUG 1 -#endif +#endif /* SLJIT_DEBUG */ /* Verbose operations. */ #ifndef SLJIT_VERBOSE /* Enabled by default */ #define SLJIT_VERBOSE 1 -#endif +#endif /* SLJIT_VERBOSE */ /* SLJIT_IS_FPU_AVAILABLE @@ -137,6 +137,6 @@ extern "C" { #ifdef __cplusplus } /* extern "C" */ -#endif +#endif /* __cplusplus */ #endif /* SLJIT_CONFIG_H_ */ diff --git a/src/sljit/sljitConfigCPU.h b/deps/sljit/sljit_src/sljitConfigCPU.h similarity index 98% rename from src/sljit/sljitConfigCPU.h rename to deps/sljit/sljit_src/sljitConfigCPU.h index 2720bda..dcf88ef 100644 --- a/src/sljit/sljitConfigCPU.h +++ b/deps/sljit/sljit_src/sljitConfigCPU.h @@ -169,7 +169,7 @@ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) #define SLJIT_CONFIG_ARM_32 1 -#endif +#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V7 || SLJIT_CONFIG_ARM_THUMB2 */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_CONFIG_X86 1 diff --git a/src/sljit/sljitConfigInternal.h b/deps/sljit/sljit_src/sljitConfigInternal.h similarity index 89% rename from src/sljit/sljitConfigInternal.h rename to deps/sljit/sljit_src/sljitConfigInternal.h index de06dd8..3ae944e 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/deps/sljit/sljit_src/sljitConfigInternal.h @@ -27,20 +27,6 @@ #ifndef SLJIT_CONFIG_INTERNAL_H_ #define SLJIT_CONFIG_INTERNAL_H_ -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE))) -#include -#endif - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG \ - && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS))) -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - /* SLJIT defines the following architecture dependent types and macros: @@ -64,16 +50,26 @@ extern "C" { SLJIT_MASKED_SHIFT : all word shifts are always masked SLJIT_MASKED_SHIFT32 : all 32 bit shifts are always masked SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information + SLJIT_UPPER_BITS_IGNORED : 32 bit operations ignores the upper bits of source registers + SLJIT_UPPER_BITS_ZERO_EXTENDED : 32 bit operations clears the upper bits of destination registers + SLJIT_UPPER_BITS_SIGN_EXTENDED : 32 bit operations replicates the sign bit in the upper bits of destination registers + SLJIT_UPPER_BITS_PRESERVED : 32 bit operations preserves the upper bits of destination registers Constants: SLJIT_NUMBER_OF_REGISTERS : number of available registers SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available scratch floating point registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available saved floating point registers + SLJIT_NUMBER_OF_VECTOR_REGISTERS : number of available vector registers + SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS : number of available scratch vector registers + SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS : number of available saved vector registers SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS : number of available temporary vector registers + SLJIT_SEPARATE_VECTOR_REGISTERS : if this macro is defined, the vector registers do not + overlap with floating point registers SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index SLJIT_F32_SHIFT : the shift required to apply when accessing a single precision floating point array by index @@ -98,16 +94,33 @@ extern "C" { SLJIT_TMP_R(i) : accessing temporary registers SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers SLJIT_TMP_FR(i) : accessing temporary floating point registers + SLJIT_TMP_VR0 .. VR9 : accessing temporary vector registers + SLJIT_TMP_VR(i) : accessing temporary vector registers SLJIT_TMP_DEST_REG : a temporary register for results SLJIT_TMP_MEM_REG : a temporary base register for accessing memory (can be the same as SLJIT_TMP_DEST_REG) SLJIT_TMP_DEST_FREG : a temporary register for float results + SLJIT_TMP_DEST_VREG : a temporary register for vector results SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero */ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE))) +#include +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG \ + && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS))) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + /***********************************************************/ /* Intel Control-flow Enforcement Technology (CET) spport. */ /***********************************************************/ @@ -285,7 +298,7 @@ extern "C" { #elif defined(_WIN32) #define SLJIT_CACHE_FLUSH(from, to) \ - FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (size_t)((char*)(to) - (char*)(from))) #elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) @@ -553,7 +566,7 @@ determine the next executed instruction after return. */ #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +/* Note: sljitLir.h also defines sljit_free_unused_memory_exec() function. */ #define SLJIT_BUILTIN_MALLOC_EXEC(size, exec_allocator_data) sljit_malloc_exec(size) #define SLJIT_BUILTIN_FREE_EXEC(ptr, exec_allocator_data) sljit_free_exec(ptr) @@ -591,10 +604,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 #define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 #define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 -#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw)) +#define SLJIT_LOCALS_OFFSET_BASE (8 * (sljit_s32)sizeof(sljit_sw)) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_UPPER_BITS_IGNORED 1 +#define SLJIT_UPPER_BITS_ZERO_EXTENDED 1 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -609,7 +624,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #else /* _WIN64 */ #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 -#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw)) +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) #endif /* !_WIN64 */ #define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 #define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 @@ -617,6 +632,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_PREF_SHIFT_REG SLJIT_R3 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_UPPER_BITS_IGNORED 1 +#define SLJIT_UPPER_BITS_ZERO_EXTENDED 1 #elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) @@ -645,6 +662,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_UPPER_BITS_IGNORED 1 +#define SLJIT_UPPER_BITS_ZERO_EXTENDED 1 #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -665,6 +684,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #else #define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw)) #endif /* SLJIT_CONFIG_PPC_64 || _AIX */ +#define SLJIT_UPPER_BITS_IGNORED 1 #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -686,6 +706,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_UPPER_BITS_SIGN_EXTENDED 1 #elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) @@ -695,12 +716,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_SEPARATE_VECTOR_REGISTERS 1 +#define SLJIT_NUMBER_OF_VECTOR_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS 2 #define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 #define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 #define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_TMP_DEST_VREG SLJIT_TMP_VR0 #define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_UPPER_BITS_IGNORED 1 +#define SLJIT_UPPER_BITS_SIGN_EXTENDED 1 #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -736,6 +764,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE #define SLJIT_MASKED_SHIFT 1 +#define SLJIT_UPPER_BITS_IGNORED 1 +#define SLJIT_UPPER_BITS_PRESERVED 1 #elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) @@ -751,6 +781,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_LOCALS_OFFSET_BASE 0 #define SLJIT_MASKED_SHIFT 1 #define SLJIT_MASKED_SHIFT32 1 +#define SLJIT_UPPER_BITS_SIGN_EXTENDED 1 #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) @@ -768,6 +799,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #endif +#if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define SLJIT_NUMBER_OF_VECTOR_REGISTERS (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS (SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) +#define SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS (SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS) +#define SLJIT_TMP_DEST_VREG (SLJIT_TMP_DEST_FREG) +#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ + #define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE) #define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ @@ -776,12 +814,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) +#define SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS \ + (SLJIT_NUMBER_OF_VECTOR_REGISTERS - SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS) + +#if (defined SLJIT_UPPER_BITS_ZERO_EXTENDED && SLJIT_UPPER_BITS_ZERO_EXTENDED) \ + + (defined SLJIT_UPPER_BITS_SIGN_EXTENDED && SLJIT_UPPER_BITS_SIGN_EXTENDED) \ + + (defined SLJIT_UPPER_BITS_PRESERVED && SLJIT_UPPER_BITS_PRESERVED) > 1 +#error "Invalid upper bits defintion" +#endif + +#if (defined SLJIT_UPPER_BITS_PRESERVED && SLJIT_UPPER_BITS_PRESERVED) \ + && !(defined SLJIT_UPPER_BITS_IGNORED && SLJIT_UPPER_BITS_IGNORED) +#error "Upper bits preserved requires bits ignored" +#endif + /**********************************/ /* Temporary register management. */ /**********************************/ #define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2) #define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define SLJIT_TMP_VREGISTER_BASE (SLJIT_NUMBER_OF_VECTOR_REGISTERS + 1) /* WARNING: Accessing temporary registers is not recommended, because they are also used by the JIT compiler for various computations. Using them @@ -815,6 +868,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); #define SLJIT_TMP_FR9 (SLJIT_TMP_FREGISTER_BASE + 9) #define SLJIT_TMP_FR(i) (SLJIT_TMP_FREGISTER_BASE + (i)) +#define SLJIT_TMP_VR0 (SLJIT_TMP_VREGISTER_BASE + 0) +#define SLJIT_TMP_VR1 (SLJIT_TMP_VREGISTER_BASE + 1) +#define SLJIT_TMP_VR2 (SLJIT_TMP_VREGISTER_BASE + 2) +#define SLJIT_TMP_VR3 (SLJIT_TMP_VREGISTER_BASE + 3) +#define SLJIT_TMP_VR4 (SLJIT_TMP_VREGISTER_BASE + 4) +#define SLJIT_TMP_VR5 (SLJIT_TMP_VREGISTER_BASE + 5) +#define SLJIT_TMP_VR6 (SLJIT_TMP_VREGISTER_BASE + 6) +#define SLJIT_TMP_VR7 (SLJIT_TMP_VREGISTER_BASE + 7) +#define SLJIT_TMP_VR8 (SLJIT_TMP_VREGISTER_BASE + 8) +#define SLJIT_TMP_VR9 (SLJIT_TMP_VREGISTER_BASE + 9) +#define SLJIT_TMP_VR(i) (SLJIT_TMP_VREGISTER_BASE + (i)) + /********************************/ /* CPU status flags management. */ /********************************/ diff --git a/src/sljit/sljitLir.c b/deps/sljit/sljit_src/sljitLir.c similarity index 82% rename from src/sljit/sljitLir.c rename to deps/sljit/sljit_src/sljitLir.c index 2dca17c..6b2d556 100644 --- a/src/sljit/sljitLir.c +++ b/deps/sljit/sljit_src/sljitLir.c @@ -96,9 +96,10 @@ /* All variable flags are even. */ #define VARIABLE_FLAG_MASK (0x3e << VARIABLE_FLAG_SHIFT) #define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) +#define GET_FLAG_TYPE_MASK(op) (((op) >> VARIABLE_FLAG_SHIFT) & 0x3e) #define GET_OPCODE(op) \ - ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & 0xff) #define HAS_FLAGS(op) \ ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) @@ -118,9 +119,9 @@ #if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) #define ABUF_SIZE 2048 -#else +#else /* !SLJIT_32BIT_ARCHITECTURE */ #define ABUF_SIZE 4096 -#endif +#endif /* SLJIT_32BIT_ARCHITECTURE */ /* Parameter parsing. */ #define REG_MASK 0x7f @@ -139,7 +140,10 @@ #define REG_PAIR_SECOND(reg) ((reg) >> 8) /* Mask for sljit_emit_enter. */ -#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) +#define ENTER_GET_REGS(regs) ((regs) & 0xff) +#define ENTER_GET_FLOAT_REGS(regs) (((regs) >> 8) & 0xff) +#define ENTER_GET_VECTOR_REGS(regs) (((regs) >> 16) & 0xff) +#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) /* Getters for simd operations, which returns with log2(size). */ #define SLJIT_SIMD_GET_OPCODE(type) ((type) & 0xff) @@ -334,19 +338,19 @@ #if defined(__NetBSD__) #include "allocator_src/sljitProtExecAllocatorNetBSD.c" -#else +#else /* !__NetBSD__ */ #include "allocator_src/sljitProtExecAllocatorPosix.c" -#endif +#endif /* __NetBSD__ */ #elif (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) #if defined(_WIN32) #include "allocator_src/sljitWXExecAllocatorWindows.c" -#else +#else /* !_WIN32 */ #include "allocator_src/sljitWXExecAllocatorPosix.c" -#endif +#endif /* _WIN32 */ -#else +#else /* !SLJIT_PROT_EXECUTABLAE_ALLOCATOR && !SLJIT_WX_EXECUTABLE_ALLOCATOR */ #if defined(_WIN32) #include "allocator_src/sljitExecAllocatorWindows.c" @@ -354,25 +358,25 @@ #include "allocator_src/sljitExecAllocatorApple.c" #elif defined(__FreeBSD__) #include "allocator_src/sljitExecAllocatorFreeBSD.c" -#else +#else /* !_WIN32 && !__APPLE__ && !__FreeBSD__ */ #include "allocator_src/sljitExecAllocatorPosix.c" -#endif +#endif /* _WIN32 */ -#endif +#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */ #else /* !SLJIT_EXECUTABLE_ALLOCATOR */ #ifndef SLJIT_UPDATE_WX_FLAGS #define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) -#endif +#endif /* SLJIT_UPDATE_WX_FLAGS */ #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ #if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) #define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset)) -#else +#else /* !SLJIT_PROT_EXECUTABLE_ALLOCATOR */ #define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr)) -#endif +#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */ /* Argument checking features. */ @@ -434,7 +438,7 @@ #define CHECK_PTR(x) x #define CHECK_REG_INDEX(x) x -#else +#else /* !SLJIT_ARGUMENT_CHECKS && !SLJIT_DEBUG && !SLJIT_VERBOSE */ /* Arguments are not checked. */ #define CHECK(x) @@ -452,7 +456,7 @@ static sljit_s32 compiler_initialized = 0; /* A thread safe initialization. */ static void init_compiler(void); -#endif +#endif /* SLJIT_CONFIG_X86 */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data) { @@ -501,6 +505,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->saveds = -1; compiler->fscratches = -1; compiler->fsaveds = -1; +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->vscratches = -1; + compiler->vsaveds = -1; +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS || SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ compiler->local_size = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -526,17 +536,25 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->last_flags = 0; + SLJIT_ASSERT(compiler->last_flags == 0 && compiler->logical_local_size == 0); compiler->last_return = -1; - compiler->logical_local_size = 0; #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +#if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + compiler->real_fscratches = -1; + compiler->real_fsaveds = -1; +#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ + SLJIT_ASSERT(compiler->skip_checks == 0); +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) if (!compiler_initialized) { init_compiler(); compiler_initialized = 1; } -#endif +#endif /* SLJIT_NEEDS_COMPILER_INIT */ return compiler; } @@ -564,7 +582,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) SLJIT_FREE(compiler->cpool, allocator_data); -#endif +#endif /* SLJIT_CONFIG_ARM_V6 */ SLJIT_FREE(compiler, allocator_data); } @@ -607,14 +625,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *com #if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) compiler->status_flags_state = current_flags; -#endif +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_flags = 0; if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_32 | SLJIT_SET_Z)); } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ } /* --------------------------------------------------------------------- */ @@ -667,11 +685,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compile if (size <= 0 || size > 128) return NULL; size = (size + 7) & ~7; -#else +#else /* !SLJIT_64BIT_ARCHITECTURE */ if (size <= 0 || size > 64) return NULL; size = (size + 3) & ~3; -#endif +#endif /* SLJIT_64BIT_ARCHITECTURE */ return ensure_abuf(compiler, (sljit_uw)size); } @@ -752,40 +770,55 @@ static SLJIT_INLINE sljit_uw sljit_get_next_min(sljit_uw next_label_size, #endif /* !SLJIT_CONFIG_X86 */ -static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +#if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + +static void update_float_register_count(struct sljit_compiler *compiler, sljit_s32 scratches, sljit_s32 saveds) { - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(local_size); + sljit_s32 vscratches = ENTER_GET_VECTOR_REGS(scratches); + sljit_s32 vsaveds = ENTER_GET_VECTOR_REGS(saveds); - compiler->options = options; - compiler->scratches = scratches; - compiler->saveds = saveds; - compiler->fscratches = fscratches; - compiler->fsaveds = fsaveds; -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->last_return = args & SLJIT_ARG_MASK; - compiler->logical_local_size = local_size; -#endif + if (compiler->fscratches < vscratches) + compiler->fscratches = vscratches; + + if (compiler->fsaveds < vsaveds) + compiler->fsaveds = vsaveds; + + if (compiler->fsaveds + compiler->fscratches > SLJIT_NUMBER_OF_FLOAT_REGISTERS) + compiler->fscratches = SLJIT_NUMBER_OF_FLOAT_REGISTERS - compiler->fsaveds; } -static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ + +static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(local_size); compiler->options = options; - compiler->scratches = scratches; - compiler->saveds = saveds; - compiler->fscratches = fscratches; - compiler->fsaveds = fsaveds; + compiler->scratches = ENTER_GET_REGS(scratches); + compiler->saveds = ENTER_GET_REGS(saveds); + /* These members may be copied to real_* members below. */ + compiler->fscratches = ENTER_GET_FLOAT_REGS(scratches); + compiler->fsaveds = ENTER_GET_FLOAT_REGS(saveds); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + compiler->vscratches = ENTER_GET_VECTOR_REGS(scratches); + compiler->vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->real_fscratches = compiler->fscratches; + compiler->real_fsaveds = compiler->fsaveds; + compiler->vscratches = ENTER_GET_VECTOR_REGS(scratches); + compiler->vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + update_float_register_count(compiler, scratches, saveds); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ } static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) @@ -900,9 +933,9 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8) -#else +#else /* !SLJIT_CONFIG_X86_32 */ #define CHECK_IF_VIRTUAL_REGISTER(p) 0 -#endif +#endif /* SLJIT_CONFIG_X86_32 */ static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { @@ -994,17 +1027,47 @@ static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, s return function_check_src_mem(compiler, p, i); } +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type); + +#define FUNCTION_CHECK_IS_VREG(vr, type) \ + function_check_is_vreg(compiler, (vr), (type)) + +#define FUNCTION_VCHECK(p, i, type) \ + CHECK_ARGUMENT(function_vcheck(compiler, (p), (i), (type))) + +static sljit_s32 function_vcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 type) +{ + if (compiler->scratches == -1) + return 0; + + if (FUNCTION_CHECK_IS_VREG(p, type)) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + #else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */ + #define FUNCTION_CHECK_IS_FREG(fr, is_32) \ function_check_is_freg(compiler, (fr)) static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr) { + sljit_s32 fscratches, fsaveds; + if (compiler->scratches == -1) return 0; - return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) - || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; +#else /* SLJIT_SEPARATE_VECTOR_REGISTERS */ + fscratches = compiler->real_fscratches; + fsaveds = compiler->real_fsaveds; +#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + fscratches)) + || (fr > (SLJIT_FS0 - fsaveds) && fr <= SLJIT_FS0) || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); } @@ -1016,9 +1079,34 @@ static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, s if (compiler->scratches == -1) return 0; - if ((p >= SLJIT_FR0 && p < (SLJIT_FR0 + compiler->fscratches)) - || (p > (SLJIT_FS0 - compiler->fsaveds) && p <= SLJIT_FS0) - || (p >= SLJIT_TMP_FREGISTER_BASE && p < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))) + if (function_check_is_freg(compiler, p)) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_IS_VREG(vr, type) \ + function_check_is_vreg(compiler, (vr)) + +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr) +{ + if (compiler->scratches == -1) + return 0; + + return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches)) + || (vr > (SLJIT_VS0 - compiler->vsaveds) && vr <= SLJIT_VS0) + || (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS)); +} + +#define FUNCTION_VCHECK(p, i, type) \ + CHECK_ARGUMENT(function_vcheck(compiler, (p), (i))) + +static sljit_s32 function_vcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if (function_check_is_vreg(compiler, p)) return (i == 0); return function_check_src_mem(compiler, p, i); @@ -1039,15 +1127,15 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp #ifdef _WIN64 #ifdef __GNUC__ # define SLJIT_PRINT_D "ll" -#else +#else /* !__GNUC__ */ # define SLJIT_PRINT_D "I64" -#endif -#else +#endif /* __GNUC__ */ +#else /* !_WIN64 */ # define SLJIT_PRINT_D "l" -#endif -#else +#endif /* _WIN64 */ +#else /* !SLJIT_64BIT_ARCHITECTURE */ # define SLJIT_PRINT_D "" -#endif +#endif /* SLJIT_64BIT_ARCHITECTURE */ static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r) { @@ -1079,57 +1167,73 @@ static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r) fprintf(compiler->verbose, "ft%d", r - SLJIT_TMP_FREGISTER_BASE); } +static void sljit_verbose_vreg(struct sljit_compiler *compiler, sljit_s32 r) +{ +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (r >= SLJIT_F64_SECOND(SLJIT_VR0)) { + fprintf(compiler->verbose, "^"); + r -= SLJIT_F64_SECOND(0); + } +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ + + if (r < (SLJIT_VR0 + compiler->vscratches)) + fprintf(compiler->verbose, "vr%d", r - SLJIT_VR0); + else if (r < SLJIT_TMP_VREGISTER_BASE) + fprintf(compiler->verbose, "vs%d", SLJIT_NUMBER_OF_VECTOR_REGISTERS - r); + else + fprintf(compiler->verbose, "vt%d", r - SLJIT_TMP_VREGISTER_BASE); +} + +static void sljit_verbose_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (!(p & REG_MASK)) { + fprintf(compiler->verbose, "[%" SLJIT_PRINT_D "d]", i); + return; + } + + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if (p & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, " * %d", 1 << (i)); + } else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); +} + static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { - if ((p) == SLJIT_IMM) - fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); - else if ((p) & SLJIT_MEM) { - if ((p) & REG_MASK) { - fputc('[', compiler->verbose); - sljit_verbose_reg(compiler, (p) & REG_MASK); - if ((p) & OFFS_REG_MASK) { - fprintf(compiler->verbose, " + "); - sljit_verbose_reg(compiler, OFFS_REG(p)); - if (i) - fprintf(compiler->verbose, " * %d", 1 << (i)); - } - else if (i) - fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); - fputc(']', compiler->verbose); - } - else - fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); - } else + if (p == SLJIT_IMM) + fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", i); + else if (p & SLJIT_MEM) + sljit_verbose_mem(compiler, p, i); + else sljit_verbose_reg(compiler, p); } static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { - if ((p) & SLJIT_MEM) { - if ((p) & REG_MASK) { - fputc('[', compiler->verbose); - sljit_verbose_reg(compiler, (p) & REG_MASK); - if ((p) & OFFS_REG_MASK) { - fprintf(compiler->verbose, " + "); - sljit_verbose_reg(compiler, OFFS_REG(p)); - if (i) - fprintf(compiler->verbose, "%d", 1 << (i)); - } - else if (i) - fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); - fputc(']', compiler->verbose); - } - else - fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); - } + if (p & SLJIT_MEM) + sljit_verbose_mem(compiler, p, i); else sljit_verbose_freg(compiler, p); } +static void sljit_verbose_vparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (p & SLJIT_MEM) + sljit_verbose_mem(compiler, p, i); + else + sljit_verbose_vreg(compiler, p); +} + static const char* op0_names[] = { "breakpoint", "nop", "lmul.uw", "lmul.sw", "divmod.u", "divmod.s", "div.u", "div.s", - "endbr", "skip_frames_before_return" + "memory_barrier", "endbr", "skip_frames_before_return" }; static const char* op1_names[] = { @@ -1184,7 +1288,7 @@ static const char* fop2r_names[] = { }; static const char* simd_op2_names[] = { - "and", "or", "xor" + "and", "or", "xor", "shuffle" }; static const char* jump_names[] = { @@ -1224,12 +1328,13 @@ static const char* call_arg_names[] = { || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) #define SLJIT_SKIP_CHECKS(compiler) (compiler)->skip_checks = 1 +#define SLJIT_CHECK_OPCODE(op, flags) ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK | (flags))) static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) struct sljit_jump *jump; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ SLJIT_UNUSED_ARG(compiler); @@ -1241,7 +1346,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_com CHECK_ARGUMENT((jump->flags & JUMP_ADDR) || jump->u.label != NULL); jump = jump->next; } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ CHECK_RETURN_OK; } @@ -1252,9 +1357,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_com #endif /* !SLJIT_CONFIG_X86 */ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 real_scratches = ENTER_GET_REGS(scratches); + sljit_s32 real_saveds = ENTER_GET_REGS(saveds); + sljit_s32 real_fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 real_fsaveds = ENTER_GET_FLOAT_REGS(saveds); + sljit_s32 real_vscratches = ENTER_GET_VECTOR_REGS(scratches); + sljit_s32 real_vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_ARGUMENT_CHECKS */ SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1264,18 +1377,23 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); } CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); - CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); - CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); - CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); - CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT((scratches & ~0xffffff) == 0 && (saveds & ~0xffffff) == 0); + CHECK_ARGUMENT(real_scratches >= 0 && real_scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(real_saveds >= 0 && real_saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); + CHECK_ARGUMENT(real_scratches + real_saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(real_fscratches >= 0 && real_fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(real_fsaveds >= 0 && real_fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + CHECK_ARGUMENT(real_fscratches + real_fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(real_vscratches >= 0 && real_vscratches <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vsaveds >= 0 && real_vsaveds <= SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vscratches + real_vsaveds <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) <= SLJIT_ARG_TYPE_F32); - CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); + CHECK_ARGUMENT(function_check_arguments(arg_types, real_scratches, + (options & SLJIT_ENTER_REG_ARG) ? 0 : real_saveds, real_fscratches)); compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); @@ -1307,17 +1425,26 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil } #endif /* !SLJIT_CONFIG_X86 */ - fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", - scratches, saveds, fscratches, fsaveds, local_size); + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, vscratches:%d, vsaveds:%d, local_size:%d\n", + ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), + ENTER_GET_VECTOR_REGS(scratches), ENTER_GET_VECTOR_REGS(saveds), local_size); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 real_scratches = ENTER_GET_REGS(scratches); + sljit_s32 real_saveds = ENTER_GET_REGS(saveds); + sljit_s32 real_fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 real_fsaveds = ENTER_GET_FLOAT_REGS(saveds); + sljit_s32 real_vscratches = ENTER_GET_VECTOR_REGS(scratches); + sljit_s32 real_vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_ARGUMENT_CHECKS */ SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1327,18 +1454,23 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); } CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); - CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); - CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); - CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); - CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT((scratches & ~0xffffff) == 0 && (saveds & ~0xffffff) == 0); + CHECK_ARGUMENT(real_scratches >= 0 && real_scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(real_saveds >= 0 && real_saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); + CHECK_ARGUMENT(real_scratches + real_saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(real_fscratches >= 0 && real_fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(real_fsaveds >= 0 && real_fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + CHECK_ARGUMENT(real_fscratches + real_fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(real_vscratches >= 0 && real_vscratches <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vsaveds >= 0 && real_vsaveds <= SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vscratches + real_vsaveds <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); - CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); + CHECK_ARGUMENT(function_check_arguments(arg_types, real_scratches, + (options & SLJIT_ENTER_REG_ARG) ? 0 : real_saveds, real_fscratches)); compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); @@ -1370,10 +1502,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi } #endif /* !SLJIT_CONFIG_X86 */ - fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", - scratches, saveds, fscratches, fsaveds, local_size); + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, vscratches:%d, vsaveds:%d, local_size:%d\n", + ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), + ENTER_GET_VECTOR_REGS(scratches), ENTER_GET_VECTOR_REGS(saveds), local_size); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1388,13 +1521,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_RET_VOID); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " return_void\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1427,13 +1560,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi break; } - if (GET_OPCODE(op) < SLJIT_MOV_F64) { + if (SLJIT_CHECK_OPCODE(op, 0) < SLJIT_MOV_F64) { FUNCTION_CHECK_SRC(src, srcw); } else { FUNCTION_FCHECK(src, srcw, op & SLJIT_32); } compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) < SLJIT_MOV_F64) { @@ -1446,7 +1579,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi } fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1455,14 +1588,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_to(struct sljit_co { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_SRC(src, srcw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " return_to "); sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1471,11 +1604,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) || ((op & ~SLJIT_32) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_32) <= SLJIT_DIV_SW) - || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); - CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2); - if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) + || (op >= SLJIT_MEMORY_BARRIER && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) < SLJIT_LMUL_UW || SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_MEMORY_BARRIER || compiler->scratches >= 2); + if ((SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_LMUL_UW && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1485,7 +1618,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler } fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1499,7 +1632,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV_S32); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_MOV && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_REV_S32); switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1520,7 +1653,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler FUNCTION_CHECK_DST(dst, dstw); FUNCTION_CHECK_SRC(src, srcw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], @@ -1531,7 +1664,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1546,26 +1679,37 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_load(struct sljit_ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P); - CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, SLJIT_ATOMIC_TEST | SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS | SLJIT_SET_Z | VARIABLE_FLAG_MASK) >= SLJIT_MOV + && SLJIT_CHECK_OPCODE(op, SLJIT_ATOMIC_TEST | SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS | SLJIT_SET_Z | VARIABLE_FLAG_MASK) <= SLJIT_MOV_P); + CHECK_ARGUMENT((op & (SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS)) != (SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS)); /* All arguments must be valid registers. */ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg)); - if (op == SLJIT_MOV32_U8 || op == SLJIT_MOV32_U16) { - /* Only SLJIT_32 is allowed. */ - CHECK_ARGUMENT(!(op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z))); - } else { + if (GET_OPCODE(op) < SLJIT_MOV_U8 || GET_OPCODE(op) > SLJIT_MOV_S16) { /* Nothing allowed. */ - CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + CHECK_ARGUMENT(!(op & SLJIT_32)); } compiler->last_flags = 0; #endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " atomic_load%s%s ", !(op & SLJIT_32) ? "" : "32", + if (op & SLJIT_ATOMIC_TEST) + CHECK_RETURN_OK; + if (sljit_emit_atomic_load(compiler, op | SLJIT_ATOMIC_TEST, dst_reg, mem_reg)) { + fprintf(compiler->verbose, " # atomic_load: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " atomic_load"); + if (op & SLJIT_ATOMIC_USE_CAS) + fprintf(compiler->verbose, "_cas"); + if (op & SLJIT_ATOMIC_USE_LS) + fprintf(compiler->verbose, "_ls"); + + fprintf(compiler->verbose, "%s%s ", !(op & SLJIT_32) ? "" : "32", op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); sljit_verbose_reg(compiler, dst_reg); fprintf(compiler->verbose, ", ["); @@ -1588,29 +1732,40 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_store(struct sljit #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P); - CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, SLJIT_ATOMIC_TEST | SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS | SLJIT_SET_Z) >= SLJIT_MOV + && SLJIT_CHECK_OPCODE(op, SLJIT_ATOMIC_TEST | SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS | SLJIT_SET_Z) <= SLJIT_MOV_P); + CHECK_ARGUMENT((op & (SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS)) != (SLJIT_ATOMIC_USE_CAS | SLJIT_ATOMIC_USE_LS)); /* All arguments must be valid registers. */ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_reg)); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(temp_reg) && src_reg != temp_reg); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(temp_reg) && (src_reg != temp_reg || (op & SLJIT_ATOMIC_USE_LS))); - CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == SLJIT_ATOMIC_STORED); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE_MASK(op) == SLJIT_ATOMIC_STORED); - if (GET_OPCODE(op) == SLJIT_MOV_U8 || GET_OPCODE(op) == SLJIT_MOV_U16) { - /* Only SLJIT_32, SLJIT_ATOMIC_STORED are allowed. */ - CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); - } else { - /* Only SLJIT_ATOMIC_STORED is allowed. */ - CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z))); + if (GET_OPCODE(op) < SLJIT_MOV_U8 || GET_OPCODE(op) > SLJIT_MOV_S16) { + /* Nothing allowed. */ + CHECK_ARGUMENT(!(op & SLJIT_32)); } - compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); + compiler->last_flags = GET_FLAG_TYPE_MASK(op) | (op & SLJIT_32); #endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " atomic_store%s%s%s ", !(op & SLJIT_32) ? "" : "32", + if (op & SLJIT_ATOMIC_TEST) + CHECK_RETURN_OK; + if (sljit_emit_atomic_store(compiler, op | SLJIT_ATOMIC_TEST, src_reg, mem_reg, temp_reg)) { + fprintf(compiler->verbose, " # atomic_store: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " atomic_store"); + if (op & SLJIT_ATOMIC_USE_CAS) + fprintf(compiler->verbose, "_cas"); + if (op & SLJIT_ATOMIC_USE_LS) + fprintf(compiler->verbose, "_ls"); + + fprintf(compiler->verbose, "%s%s%s ", !(op & SLJIT_32) ? "" : "32", op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & VARIABLE_FLAG_MASK) ? "" : ".stored"); sljit_verbose_reg(compiler, src_reg); fprintf(compiler->verbose, ", ["); @@ -1634,7 +1789,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ROTR); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_ADD && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_ROTR); switch (GET_OPCODE(op)) { case SLJIT_AND: @@ -1687,7 +1842,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", @@ -1703,7 +1858,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1718,7 +1873,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2r(struct sljit_compile FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s ", op2r_names[GET_OPCODE(op) - SLJIT_OP2R_BASE], !(op & SLJIT_32) ? "" : "32"); @@ -1730,7 +1885,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2r(struct sljit_compile sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1741,15 +1896,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_c sljit_s32 src3, sljit_sw src3w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_LSHR - || GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) == SLJIT_SHL || SLJIT_CHECK_OPCODE(op, 0) == SLJIT_LSHR + || SLJIT_CHECK_OPCODE(op, 0) == SLJIT_MSHL || SLJIT_CHECK_OPCODE(op, 0) == SLJIT_MLSHR); CHECK_ARGUMENT((op & ~(0xff | SLJIT_32 | SLJIT_SHIFT_INTO_NON_ZERO)) == 0); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src1_reg)); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src2_reg)); FUNCTION_CHECK_SRC(src3, src3w); CHECK_ARGUMENT(dst_reg != src2_reg); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.into%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", @@ -1764,7 +1919,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_c sljit_verbose_param(compiler, src3, src3w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1781,14 +1936,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compi } else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE) { CHECK_ARGUMENT(src & SLJIT_MEM); } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s ", op_src_dst_names[op - SLJIT_OP_SRC_DST_BASE]); sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1801,14 +1956,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_dst(struct sljit_compi if (op == SLJIT_FAST_ENTER) compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s ", op_src_dst_names[op - SLJIT_OP_SRC_DST_BASE]); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1820,12 +1975,19 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 t if (type == SLJIT_GP_REGISTER) { CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS) || (reg >= SLJIT_TMP_REGISTER_BASE && reg < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))); - } else { - CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))); + } +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + else if (((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))) { + CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_VECTOR_REGISTERS) + || (reg >= SLJIT_TMP_VREGISTER_BASE && reg < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS))); + } +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ + else { + CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6) || (type & (3 << 12)) || (type & (4 << 12)) || (type & (5 << 12)) || (type & (6 << 12)))); CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS) || (reg >= SLJIT_TMP_FREGISTER_BASE && reg < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))); } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ CHECK_RETURN_OK; } @@ -1834,7 +1996,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co { #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) sljit_u32 i; -#endif +#endif /* SLJIT_VERBOSE */ SLJIT_UNUSED_ARG(compiler); @@ -1848,12 +2010,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0)); #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) CHECK_ARGUMENT(size == 2 || size == 4 || size == 6); -#else +#else /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM_THUMB2 && !SLJIT_CONFIG_S390X */ CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); -#endif +#endif /* SLJIT_CONFIG_X86 */ compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " op_custom"); @@ -1861,7 +2023,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co fprintf(compiler->verbose, " 0x%x", ((sljit_u8*)instruction)[i]); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1876,11 +2038,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_MOV_F64 && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_ABS_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src, srcw, op & SLJIT_32); FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) @@ -1895,7 +2057,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile sljit_verbose_fparam(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1905,7 +2067,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; @@ -1914,13 +2076,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) == SLJIT_CMP_F64); CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) || (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL)); FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); @@ -1933,7 +2095,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1951,7 +2113,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src, srcw, op & SLJIT_32); FUNCTION_CHECK_DST(dst, dstw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], @@ -1962,7 +2124,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str sljit_verbose_fparam(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -1980,7 +2142,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_w(stru CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_CHECK_SRC(src, srcw); FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from.%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], @@ -1991,7 +2153,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_w(stru sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2007,12 +2169,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_ADD_F64 && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_DIV_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); @@ -2023,7 +2185,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2034,11 +2196,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2r(struct sljit_compil { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_COPYSIGN_F64); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) == SLJIT_COPYSIGN_F64); FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, op & SLJIT_32)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s ", fop2r_names[GET_OPCODE(op) - SLJIT_FOP2R_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); @@ -2049,7 +2211,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2r(struct sljit_compil sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2066,14 +2228,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset32(struct sljit_compi #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 1)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fset32 "); sljit_verbose_freg(compiler, freg); fprintf(compiler->verbose, ", %f\n", value); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2090,14 +2252,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset64(struct sljit_compi #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fset64 "); sljit_verbose_freg(compiler, freg); fprintf(compiler->verbose, ", %f\n", value); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2106,7 +2268,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compil { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_COPY_TO_F64 && GET_OPCODE(op) <= SLJIT_COPY_FROM_F64); + CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_COPY_TO_F64 && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_COPY_FROM_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, op & SLJIT_32)); @@ -2135,7 +2297,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compil break; } #endif /* SLJIT_64BIT_ARCHITECTURE */ -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " copy%s_%s_f%s ", (op & SLJIT_32) ? "32" : "", @@ -2155,7 +2317,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compil fprintf(compiler->verbose, "\n"); } } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2170,12 +2332,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) fprintf(compiler->verbose, "label:\n"); -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2185,9 +2347,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil #define CHECK_UNORDERED(type, last_flags) \ ((((type) & 0xfe) == SLJIT_ORDERED) && \ ((last_flags) & 0xff) >= SLJIT_UNORDERED && ((last_flags) & 0xff) <= SLJIT_ORDERED_LESS_EQUAL) -#else +#else /* !SLJIT_CONFIG_X86 || SLJIT_CONFIG_ARM */ #define CHECK_UNORDERED(type, last_flags) 0 -#endif +#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_ARM */ #endif /* SLJIT_ARGUMENT_CHECKS */ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) @@ -2211,12 +2373,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile CHECK_ARGUMENT((type & 0xfe) == (compiler->last_flags & 0xff) || CHECK_UNORDERED(type, compiler->last_flags)); } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) fprintf(compiler->verbose, " jump%s %s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2237,7 +2399,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); } } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s%s ret[%s", jump_names[type & 0xff], @@ -2257,7 +2419,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile } fprintf(compiler->verbose, "]\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2271,7 +2433,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " cmp%s%s %s, ", (type & SLJIT_32) ? "32" : "", @@ -2281,7 +2443,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2296,7 +2458,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile FUNCTION_FCHECK(src1, src1w, type & SLJIT_32); FUNCTION_FCHECK(src2, src2w, type & SLJIT_32); compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fcmp%s%s %s, ", (type & SLJIT_32) ? ".f32" : ".f64", @@ -2306,7 +2468,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2321,14 +2483,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compil #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_FAST_CALL); FUNCTION_CHECK_SRC(src, srcw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2351,7 +2513,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); } } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], @@ -2372,7 +2534,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2383,7 +2545,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 - || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); + || (SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_AND && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_XOR)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); if (type <= SLJIT_NOT_ZERO) @@ -2396,7 +2558,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com if (GET_OPCODE(op) >= SLJIT_ADD) compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " flags.%s%s%s ", @@ -2406,7 +2568,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", %s\n", jump_names[type]); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2433,7 +2595,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_select(struct sljit_compi } else CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff) || CHECK_UNORDERED(cond, compiler->last_flags)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " select%s %s, ", @@ -2446,7 +2608,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_select(struct sljit_compi sljit_verbose_reg(compiler, src2_reg); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2473,7 +2635,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fselect(struct sljit_comp } else CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff) || CHECK_UNORDERED(cond, compiler->last_flags)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fselect%s %s, ", @@ -2486,7 +2648,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fselect(struct sljit_comp sljit_verbose_freg(compiler, src2_freg); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2540,7 +2702,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler } FUNCTION_CHECK_SRC_MEM(mem, memw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if ((type & 0xff) == SLJIT_MOV32) @@ -2572,7 +2734,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler sljit_verbose_param(compiler, mem, memw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2591,7 +2753,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem_update(struct sljit_c CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); FUNCTION_CHECK_SRC_MEM(mem, memw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_MEM_SUPP) @@ -2617,7 +2779,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem_update(struct sljit_c sljit_verbose_param(compiler, mem, memw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2641,7 +2803,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32)); FUNCTION_CHECK_SRC_MEM(mem, memw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s.%s", @@ -2661,7 +2823,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile sljit_verbose_param(compiler, mem, memw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2675,7 +2837,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_ CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0); FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32)); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_MEM_SUPP) @@ -2695,12 +2857,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_ sljit_verbose_param(compiler, mem, memw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2709,14 +2871,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_com CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); - FUNCTION_FCHECK(srcdst, srcdstw, 0); -#endif + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, type)); + FUNCTION_VCHECK(srcdst, srcdstw, type); +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_mov(compiler, type | SLJIT_SIMD_TEST, freg, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_mov(compiler, type | SLJIT_SIMD_TEST, vreg, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_mem: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } @@ -2732,17 +2894,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_com else fprintf(compiler->verbose, ".al%d ", (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type))); - sljit_verbose_freg(compiler, freg); + sljit_verbose_vreg(compiler, vreg); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(compiler, srcdst, srcdstw); + sljit_verbose_vparam(compiler, srcdst, srcdstw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2750,7 +2912,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct slj CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, type)); if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) { @@ -2761,12 +2923,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct slj } else if (src != SLJIT_IMM) { FUNCTION_CHECK_DST(src, srcw); } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_replicate(compiler, type | SLJIT_SIMD_TEST, vreg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_dup: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } @@ -2776,7 +2938,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct slj (type & SLJIT_SIMD_FLOAT) ? "f" : "", (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); - sljit_verbose_freg(compiler, freg); + sljit_verbose_vreg(compiler, vreg); fprintf(compiler->verbose, ", "); if (type & SLJIT_SIMD_FLOAT) sljit_verbose_fparam(compiler, src, srcw); @@ -2784,12 +2946,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct slj sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2801,7 +2963,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct slji CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, type)); CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); if (type & SLJIT_SIMD_FLOAT) { @@ -2809,12 +2971,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct slji } else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) { FUNCTION_CHECK_DST(srcdst, srcdstw); } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_TEST, freg, lane_index, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_TEST, vreg, lane_index, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_move_lane: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } @@ -2828,7 +2990,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct slji (type & SLJIT_SIMD_FLOAT) ? "f" : "", (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); - sljit_verbose_freg(compiler, freg); + sljit_verbose_vreg(compiler, vreg); fprintf(compiler->verbose, "[%d], ", lane_index); if (type & SLJIT_SIMD_FLOAT) sljit_verbose_fparam(compiler, srcdst, srcdstw); @@ -2836,12 +2998,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct slji sljit_verbose_param(compiler, srcdst, srcdstw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2849,15 +3011,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struc CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(src, type)); CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_lane_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, src_lane_index) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_lane_replicate(compiler, type | SLJIT_SIMD_TEST, vreg, src, src_lane_index) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_lane_replicate: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } @@ -2867,17 +3029,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struc (type & SLJIT_SIMD_FLOAT) ? "f" : "", (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); - sljit_verbose_freg(compiler, freg); + sljit_verbose_vreg(compiler, vreg); fprintf(compiler->verbose, ", "); - sljit_verbose_freg(compiler, src); + sljit_verbose_vreg(compiler, src); fprintf(compiler->verbose, "[%d]\n", src_lane_index); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2887,14 +3049,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); - FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); -#endif + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, type)); + FUNCTION_VCHECK(src, srcw, type); +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_TEST, vreg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_extend: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } @@ -2907,17 +3069,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_ (type & SLJIT_SIMD_FLOAT) ? "f" : "", (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); - sljit_verbose_freg(compiler, freg); + sljit_verbose_vreg(compiler, vreg); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(compiler, src, srcw); + sljit_verbose_vparam(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2925,14 +3087,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_co CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_32)) == SLJIT_SIMD_STORE); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, type)); FUNCTION_CHECK_DST(dst, dstw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_sign(compiler, type | SLJIT_SIMD_TEST, freg, dst, dstw) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_sign(compiler, type | SLJIT_SIMD_TEST, vreg, dst, dstw) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_sign: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } @@ -2943,50 +3105,56 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_co (type & SLJIT_SIMD_FLOAT) ? "f" : "", (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); - sljit_verbose_freg(compiler, freg); + sljit_verbose_vreg(compiler, vreg); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); - CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) >= SLJIT_SIMD_OP2_AND && (type & SLJIT_SIMD_TYPE_MASK(0)) <= SLJIT_SIMD_OP2_XOR); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(0)) >= SLJIT_SIMD_OP2_AND && (type & SLJIT_SIMD_TYPE_MASK2(0)) <= SLJIT_SIMD_OP2_SHUFFLE); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, 0)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src1_freg, 0)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, 0)); -#endif + CHECK_ARGUMENT(SLJIT_SIMD_GET_OPCODE(type) != SLJIT_SIMD_OP2_SHUFFLE || (SLJIT_SIMD_GET_ELEM_SIZE(type) == 0 && !(type & SLJIT_SIMD_FLOAT))); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (src2 & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(dst_vreg, type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(src1_vreg, type)); + FUNCTION_VCHECK(src2, src2w, type); +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (type & SLJIT_SIMD_TEST) CHECK_RETURN_OK; - if (sljit_emit_simd_op2(compiler, type | SLJIT_SIMD_TEST, dst_freg, src1_freg, src2_freg) == SLJIT_ERR_UNSUPPORTED) { + if (sljit_emit_simd_op2(compiler, type | SLJIT_SIMD_TEST, dst_vreg, src1_vreg, src2, src2w) == SLJIT_ERR_UNSUPPORTED) { fprintf(compiler->verbose, " # simd_op2: unsupported form, no instructions are emitted\n"); CHECK_RETURN_OK; } - fprintf(compiler->verbose, " simd_%s.%d.%s%d ", + fprintf(compiler->verbose, " simd_%s.%d.%s%d", simd_op2_names[SLJIT_SIMD_GET_OPCODE(type) - 1], (8 << SLJIT_SIMD_GET_REG_SIZE(type)), (type & SLJIT_SIMD_FLOAT) ? "f" : "", (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); - sljit_verbose_freg(compiler, dst_freg); + if ((type & 0x3f000000) != SLJIT_SIMD_MEM_UNALIGNED) + fprintf(compiler->verbose, ".al%d", (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type))); + + fprintf(compiler->verbose, " "); + sljit_verbose_vreg(compiler, dst_vreg); fprintf(compiler->verbose, ", "); - sljit_verbose_freg(compiler, src1_freg); + sljit_verbose_vreg(compiler, src1_vreg); fprintf(compiler->verbose, ", "); - sljit_verbose_freg(compiler, src2_freg); + sljit_verbose_vparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -2997,14 +3165,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_co #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " local_base "); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -3014,14 +3182,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " const "); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -3029,14 +3197,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mov_addr(struct sljit_com { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " mov_addr "); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } -#endif +#endif /* SLJIT_VERBOSE */ CHECK_RETURN_OK; } @@ -3114,23 +3282,23 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji #define SLJIT_CPUINFO_PART1 " 32bit (" #elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #define SLJIT_CPUINFO_PART1 " 64bit (" -#else +#else /* !SLJIT_32BIT_ARCHITECTURE && !SLJIT_64BIT_ARCHITECTURE */ #error "Internal error: CPU type info missing" -#endif +#endif /* SLJIT_32BIT_ARCHITECTURE */ #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) #define SLJIT_CPUINFO_PART2 "little endian + " #elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) #define SLJIT_CPUINFO_PART2 "big endian + " -#else +#else /* !SLJIT_LITTLE_ENDIAN && !SLJIT_BIG_ENDIAN */ #error "Internal error: CPU type info missing" -#endif +#endif /* SLJIT_LITTLE_ENDIAN */ #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) #define SLJIT_CPUINFO_PART3 "unaligned)" -#else +#else /* !SLJIT_UNALIGNED */ #define SLJIT_CPUINFO_PART3 "aligned)" -#endif +#endif /* SLJIT_UNALIGNED */ #define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 @@ -3154,7 +3322,7 @@ static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, slji # include "sljitNativeS390X.c" #elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) # include "sljitNativeLOONGARCH_64.c" -#endif +#endif /* SLJIT_CONFIG_X86 */ #include "sljitSerialize.c" @@ -3164,10 +3332,10 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) return SLJIT_SUCCESS; -#else +#else /* !SLJIT_64BIT_ARCHITECTURE */ if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P)) return SLJIT_SUCCESS; -#endif +#endif /* SLJIT_64BIT_ARCHITECTURE */ SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); @@ -3249,7 +3417,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (src2 == SLJIT_IMM && !src2w) return emit_cmp_to0(compiler, type, src1, src1w); } -#endif +#endif /* SLJIT_CONFIG_ARM_64 */ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM && src2 != SLJIT_IMM)) { /* Immediate is preferred as second argument by most architectures. */ @@ -3389,17 +3557,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ && !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ && !(defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(vreg); SLJIT_UNUSED_ARG(srcdst); SLJIT_UNUSED_ARG(srcdstw); @@ -3407,14 +3576,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(vreg); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); @@ -3422,14 +3591,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(vreg); SLJIT_UNUSED_ARG(lane_index); SLJIT_UNUSED_ARG(srcdst); SLJIT_UNUSED_ARG(srcdstw); @@ -3438,14 +3607,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(vreg); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(src_lane_index); @@ -3453,14 +3622,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(vreg); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); @@ -3468,14 +3637,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(vreg); SLJIT_UNUSED_ARG(dst); SLJIT_UNUSED_ARG(dstw); @@ -3483,56 +3652,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(dst_freg); - SLJIT_UNUSED_ARG(src1_freg); - SLJIT_UNUSED_ARG(src2_freg); - - return SLJIT_ERR_UNSUPPORTED; -} - -#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM */ - -#if !(defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86) \ - && !(defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \ - && !(defined(SLJIT_CONFIG_S390X) && SLJIT_CONFIG_S390X) \ - && !(defined(SLJIT_CONFIG_LOONGARCH) && SLJIT_CONFIG_LOONGARCH) - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, - sljit_s32 op, - sljit_s32 dst_reg, - sljit_s32 mem_reg) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst_reg); - SLJIT_UNUSED_ARG(mem_reg); - - CHECK_ERROR(); - CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); - - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, - sljit_s32 op, - sljit_s32 src_reg, - sljit_s32 mem_reg, - sljit_s32 temp_reg) -{ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src_reg); - SLJIT_UNUSED_ARG(mem_reg); - SLJIT_UNUSED_ARG(temp_reg); - - CHECK_ERROR(); - CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + SLJIT_UNUSED_ARG(dst_vreg); + SLJIT_UNUSED_ARG(src1_vreg); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); return SLJIT_ERR_UNSUPPORTED; } diff --git a/src/sljit/sljitLir.h b/deps/sljit/sljit_src/sljitLir.h similarity index 91% rename from src/sljit/sljitLir.h rename to deps/sljit/sljit_src/sljitLir.h index 8b6fa69..60d34f1 100644 --- a/src/sljit/sljitLir.h +++ b/deps/sljit/sljit_src/sljitLir.h @@ -87,7 +87,7 @@ of sljitConfigInternal.h */ #ifdef __cplusplus extern "C" { -#endif +#endif /* __cplusplus */ /* Version numbers. */ #define SLJIT_MAJOR_VERSION 0 @@ -251,7 +251,7 @@ extern "C" { #define SLJIT_FS7 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 7) #define SLJIT_FS8 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 8) #define SLJIT_FS9 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 9) -/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) +/* All FS registers provided by the architecture can be accessed by SLJIT_FS(i) The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ #define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) @@ -262,6 +262,52 @@ extern "C" { #define SLJIT_RETURN_FREG SLJIT_FR0 +/* --------------------------------------------------------------------- */ +/* Vector registers */ +/* --------------------------------------------------------------------- */ + +/* Vector registers are storage areas, which are used for Single Instruction + Multiple Data (SIMD) computations. The VR and VS register sets overlap + in the same way as R and S register sets. See above. + + The storage space of vector registers often overlap with floating point + registers. In this case setting the value of SLJIT_VR(i) destroys the + value of SLJIT_FR(i) and vice versa. See SLJIT_SEPARATE_VECTOR_REGISTERS + macro. */ + +/* Vector scratch registers. */ +#define SLJIT_VR0 1 +#define SLJIT_VR1 2 +#define SLJIT_VR2 3 +#define SLJIT_VR3 4 +#define SLJIT_VR4 5 +#define SLJIT_VR5 6 +#define SLJIT_VR6 7 +#define SLJIT_VR7 8 +#define SLJIT_VR8 9 +#define SLJIT_VR9 10 +/* All VR registers provided by the architecture can be accessed by SLJIT_VR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_VECTOR_REGISTERS. */ +#define SLJIT_VR(i) (1 + (i)) + +/* Vector saved registers. */ +#define SLJIT_VS0 (SLJIT_NUMBER_OF_VECTOR_REGISTERS) +#define SLJIT_VS1 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 1) +#define SLJIT_VS2 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 2) +#define SLJIT_VS3 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 3) +#define SLJIT_VS4 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 4) +#define SLJIT_VS5 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 5) +#define SLJIT_VS6 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 6) +#define SLJIT_VS7 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 7) +#define SLJIT_VS8 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 8) +#define SLJIT_VS9 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 9) +/* All VS registers provided by the architecture can be accessed by SLJIT_VS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS. */ +#define SLJIT_VS(i) (SLJIT_NUMBER_OF_VECTOR_REGISTERS - (i)) + +/* Vector registers >= SLJIT_FIRST_SAVED_VECTOR_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_VECTOR_REG (SLJIT_VS0 - SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS + 1) + /* --------------------------------------------------------------------- */ /* Argument type definitions */ /* --------------------------------------------------------------------- */ @@ -483,6 +529,15 @@ struct sljit_compiler { sljit_s32 fscratches; /* Available float saved registers. */ sljit_s32 fsaveds; +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + /* Available vector scratch registers. */ + sljit_s32 vscratches; + /* Available vector saved registers. */ + sljit_s32 vsaveds; +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS || SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */ /* Local stack size. */ sljit_s32 local_size; /* Maximum code size. */ @@ -563,6 +618,7 @@ struct sljit_compiler { FILE* verbose; #endif /* SLJIT_VERBOSE */ + /* Note: SLJIT_DEBUG enables SLJIT_ARGUMENT_CHECKS. */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) /* Flags specified by the last arithmetic instruction. @@ -577,6 +633,13 @@ struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +#if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + /* Available float scratch registers. */ + sljit_s32 real_fscratches; + /* Available float saved registers. */ + sljit_s32 real_fsaveds; +#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ + /* Trust arguments when an API function is called. Used internally for calling API functions. */ sljit_s32 skip_checks; @@ -634,7 +697,7 @@ static SLJIT_INLINE void* sljit_compiler_get_user_data(struct sljit_compiler *co #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) /* Passing NULL disables verbose. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); -#endif +#endif /* SLJIT_VERBOSE */ /* Option bits for sljit_generate_code. */ @@ -680,7 +743,9 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler support while others (e.g. move with update) are emulated if not available. However, even when a feature is emulated, specialized code paths may be faster than the emulation. Some limitations are emulated as well so their - general case is supported but it has extra performance costs. */ + general case is supported but it has extra performance costs. + + Note: sljitConfigInternal.h also provides several feature detection macros. */ /* [Not emulated] Floating-point support is available. */ #define SLJIT_HAS_FPU 0 @@ -715,20 +780,22 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler a simd operation represents the same 128 bit register, and both SLJIT_FR0 and SLJIT_FR1 are overwritten. */ #define SLJIT_SIMD_REGS_ARE_PAIRS 13 -/* [Not emulated] Atomic support is available (fine-grained). */ -#define SLJIT_HAS_ATOMIC 14 +/* [Not emulated] Atomic support is available. */ +#define SLJIT_HAS_ATOMIC 14 +/* [Not emulated] Memory barrier support is available. */ +#define SLJIT_HAS_MEMORY_BARRIER 15 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* [Not emulated] AVX support is available on x86. */ #define SLJIT_HAS_AVX 100 /* [Not emulated] AVX2 support is available on x86. */ #define SLJIT_HAS_AVX2 101 -#endif +#endif /* SLJIT_CONFIG_X86 */ #if (defined SLJIT_CONFIG_LOONGARCH) /* [Not emulated] LASX support is available on LoongArch */ #define SLJIT_HAS_LASX 201 -#endif +#endif /* SLJIT_CONFIG_LOONGARCH */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); @@ -749,42 +816,65 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); with an error code. */ /* - The executable code is a function from the viewpoint of the C - language. The function calls must conform to the ABI (Application - Binary Interface) of the platform, which specify the purpose of - machine registers and stack handling among other things. The - sljit_emit_enter function emits the necessary instructions for - setting up a new context for the executable code. This is often - called as function prologue. Furthermore the options argument - can be used to pass configuration options to the compiler. The + The executable code is a callable function from the viewpoint + of the C language. Function calls must conform with the ABI + (Application Binary Interface) of the target platform, which + specify the purpose of machine registers and stack handling + among other things. The sljit_emit_enter function emits the + necessary instructions for setting up an entry point for the + executable code. This is often called as function prologue. + + The "options" argument can be used to pass configuration options + to the sljit compiler which affects the generated code, until + another sljit_emit_enter or sljit_set_context is called. The available options are listed before sljit_emit_enter. The function argument list is specified by the SLJIT_ARGSx (SLJIT_ARGS0 .. SLJIT_ARGS4) macros. Currently maximum four arguments are supported. See the description of SLJIT_ARGSx - macros about argument passing. Furthermore the register set - used by the function must be declared as well. The number of - scratch and saved registers available to the function must - be passed to sljit_emit_enter. Only R registers between R0 - and "scratches" argument can be used later. E.g. if "scratches" - is set to two, the scratch register set will be limited to - SLJIT_R0 and SLJIT_R1. The S registers and the floating point - registers ("fscratches" and "fsaveds") are specified in a - similar manner. The sljit_emit_enter is also capable of - allocating a stack space for local data. The "local_size" - argument contains the size in bytes of this local area, and - it can be accessed using SLJIT_MEM1(SLJIT_SP). The memory - area between SLJIT_SP (inclusive) and SLJIT_SP + local_size - (exclusive) can be modified freely until the function returns. - The stack space is not initialized to zero. + macros about argument passing. + + The register set used by the function must be declared as well. + The number of scratch and saved registers available to the + function must be passed to sljit_emit_enter. Only R registers + between R0 and "scratches" argument can be used later. E.g. + if "scratches" is set to two, the scratch register set will + be limited to SLJIT_R0 and SLJIT_R1. The S registers are + declared in a similar manner, but their count is specified + by "saveds" argument. The floating point scratch and saved + registers can be set by using "scratches" and "saveds" argument + as well, but their value must be passed to the SLJIT_ENTER_FLOAT + macro, see below. + + The sljit_emit_enter is also capable of allocating a stack + space for local data. The "local_size" argument contains the + size in bytes of this local area, and it can be accessed using + SLJIT_MEM1(SLJIT_SP). The memory area between SLJIT_SP (inclusive) + and SLJIT_SP + local_size (exclusive) can be modified freely + until the function returns. The alocated stack space is an + uninitialized memory area. + + Floating point scratch and saved registers must be specified + by the SLJIT_ENTER_FLOAT macro, which result value should be + combined with scratches / saveds argument. + + Examples: + To use three scratch and four floating point scratch + registers, the "scratches" argument must be set to: + 3 | SLJIT_ENTER_FLOAT(4) + + To use six saved and five floating point saved + registers, the "saveds" argument must be set to: + 6 | SLJIT_ENTER_FLOAT(5) Note: the following conditions must met: 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS - 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS - 0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS - fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + + 0 <= float scratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= float saveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + float scratches + float saveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS Note: the compiler can use saved registers as scratch registers, but the opposite is not supported @@ -793,6 +883,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); overwrites the previous context. */ +/* The following options are available for sljit_emit_enter. */ + /* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive) are not saved / restored on function enter / return. Instead, these registers can be used to pass / return data (such as @@ -808,17 +900,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); and all arguments must be stored in scratch registers. */ #define SLJIT_ENTER_REG_ARG 0x00000004 -/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ -#define SLJIT_MAX_LOCAL_SIZE 1048576 - #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* Use VEX prefix for all SIMD operations on x86. */ #define SLJIT_ENTER_USE_VEX 0x00010000 #endif /* !SLJIT_CONFIG_X86 */ +/* Macros for other sljit_emit_enter arguments. */ + +/* Floating point scratch and saved registers can be + specified by SLJIT_ENTER_FLOAT. */ +#define SLJIT_ENTER_FLOAT(regs) ((regs) << 8) + +/* Vector scratch and saved registers can be specified + by SLJIT_ENTER_VECTOR. */ +#define SLJIT_ENTER_VECTOR(regs) ((regs) << 16) + +/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ +#define SLJIT_MAX_LOCAL_SIZE 1048576 + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size); /* The SLJIT compiler has a current context (which contains the local stack space size, number of used registers, etc.) which is initialized @@ -834,8 +936,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi the previous context. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size); /* Return to the caller function. The sljit_emit_return_void function does not return with any value. The sljit_emit_return function returns @@ -1092,16 +1194,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c the behaviour is undefined. */ #define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) #define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) +/* Flags: - (does not modify flags) + May return with SLJIT_ERR_UNSUPPORTED if SLJIT_HAS_MEMORY_BARRIER + feature is not supported (calling sljit_has_cpu_feature() with + this feature option returns with 0). */ +#define SLJIT_MEMORY_BARRIER (SLJIT_OP0_BASE + 8) /* Flags: - (does not modify flags) ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 when Intel Control-flow Enforcement Technology (CET) is enabled. No instructions are emitted for other architectures. */ -#define SLJIT_ENDBR (SLJIT_OP0_BASE + 8) +#define SLJIT_ENDBR (SLJIT_OP0_BASE + 9) /* Flags: - (may destroy flags) Skip stack frames before return when Intel Control-flow Enforcement Technology (CET) is enabled. No instructions are emitted for other architectures. */ -#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 9) +#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 10) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); @@ -1890,21 +1997,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler /* The following options are used by several simd operations. */ -/* Load data into a simd register, this is the default */ +/* Load data into a vector register, this is the default */ #define SLJIT_SIMD_LOAD 0x000000 -/* Store data from a simd register */ +/* Store data from a vector register */ #define SLJIT_SIMD_STORE 0x000001 -/* The simd register contains floating point values */ +/* The vector register contains floating point values */ #define SLJIT_SIMD_FLOAT 0x000400 /* Tests whether the operation is available */ #define SLJIT_SIMD_TEST 0x000800 -/* Move data to/from a 64 bit (8 byte) long SIMD register */ +/* Move data to/from a 64 bit (8 byte) long vector register */ #define SLJIT_SIMD_REG_64 (3 << 12) -/* Move data to/from a 128 bit (16 byte) long SIMD register */ +/* Move data to/from a 128 bit (16 byte) long vector register */ #define SLJIT_SIMD_REG_128 (4 << 12) -/* Move data to/from a 256 bit (32 byte) long SIMD register */ +/* Move data to/from a 256 bit (32 byte) long vector register */ #define SLJIT_SIMD_REG_256 (5 << 12) -/* Move data to/from a 512 bit (64 byte) long SIMD register */ +/* Move data to/from a 512 bit (64 byte) long vector register */ #define SLJIT_SIMD_REG_512 (6 << 12) /* Element size is 8 bit long (this is the default), usually cannot be combined with SLJIT_SIMD_FLOAT */ #define SLJIT_SIMD_ELEM_8 (0 << 18) @@ -1919,7 +2026,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler /* Element size is 256 bit long */ #define SLJIT_SIMD_ELEM_256 (5 << 18) -/* The following options are used by sljit_emit_simd_mov(). */ +/* The following options are used by sljit_emit_simd_mov() + and sljit_emit_simd_op2(). */ /* Memory address is unaligned (this is the default) */ #define SLJIT_SIMD_MEM_UNALIGNED (0 << 24) @@ -1936,7 +2044,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler /* Memory address is 512 bit aligned */ #define SLJIT_SIMD_MEM_ALIGNED_512 (6 << 24) -/* Moves data between a simd register and memory. +/* Moves data between a vector register and memory. If the operation is not supported, it returns with SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, @@ -1944,21 +2052,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler type must be a combination of SLJIT_SIMD_* and SLJIT_SIMD_MEM_* options - freg is the source or destination simd register + vreg is the source or destination vector register of the operation - srcdst must be a memory operand or a simd register + srcdst must be a memory operand or a vector register Note: The alignment and element size must be - less or equal than simd register size. + less or equal than vector register size. Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw); -/* Replicates a scalar value to all lanes of a simd +/* Replicates a scalar value to all lanes of a vector register. If the operation is not supported, it returns with @@ -1967,7 +2075,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co type must be a combination of SLJIT_SIMD_* options except SLJIT_SIMD_STORE. - freg is the destination simd register of the operation + vreg is the destination vector register of the operation src is the value which is replicated Note: @@ -1977,7 +2085,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw); /* The following options are used by sljit_emit_simd_lane_mov(). */ @@ -1987,7 +2095,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil /* Sign extend the integer value stored from the lane. */ #define SLJIT_SIMD_LANE_SIGNED 0x000004 -/* Moves data between a simd register lane and a register or +/* Moves data between a vector register lane and a register or memory. If the srcdst argument is a register, it must be a floating point register when SLJIT_SIMD_FLOAT is specified, or a general purpose register otherwise. @@ -2003,7 +2111,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil is set and SLJIT_SIMD_FLOAT is not set SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD is specified - freg is the source or destination simd register + vreg is the source or destination vector register of the operation lane_index is the index of the lane srcdst is the destination operand for loads, and @@ -2015,11 +2123,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw); /* Replicates a scalar value from a lane to all lanes - of a simd register. + of a vector register. If the operation is not supported, it returns with SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, @@ -2027,14 +2135,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile type must be a combination of SLJIT_SIMD_* options except SLJIT_SIMD_STORE. - freg is the destination simd register of the operation - src is the simd register which lane is replicated + vreg is the destination vector register of the operation + src is the vector register which lane is replicated src_lane_index is the lane index of the src register Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index); /* The following options are used by sljit_emit_simd_load_extend(). */ @@ -2048,7 +2156,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c /* Extend data to 64 bit */ #define SLJIT_SIMD_EXTEND_64 (3 << 24) -/* Extend elements and stores them in a simd register. +/* Extend elements and stores them in a vector register. The extension operation increases the size of the elements (e.g. from 16 bit to 64 bit). For integer values, the extension can be signed or unsigned. @@ -2059,15 +2167,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c type must be a combination of SLJIT_SIMD_*, and SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE - freg is the destination simd register of the operation - src must be a memory operand or a simd register. + vreg is the destination vector register of the operation + src must be a memory operand or a vector register. In the latter case, the source elements are stored in the lower half of the register. Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw); /* Extract the highest bit (usually the sign bit) from @@ -2079,16 +2187,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler type must be a combination of SLJIT_SIMD_* and SLJIT_32 options except SLJIT_SIMD_LOAD - freg is the source simd register of the operation + vreg is the source vector register of the operation dst is the destination operand Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw); -/* The following options are used by sljit_emit_simd_op2(). */ +/* The following operations are used by sljit_emit_simd_op2(). */ /* Binary 'and' operation */ #define SLJIT_SIMD_OP2_AND 0x000001 @@ -2096,23 +2204,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c #define SLJIT_SIMD_OP2_OR 0x000002 /* Binary 'xor' operation */ #define SLJIT_SIMD_OP2_XOR 0x000003 +/* Shuffle bytes of src1 using the indicies in src2 */ +#define SLJIT_SIMD_OP2_SHUFFLE 0x000004 -/* Perform simd operations using simd registers. +/* Perform simd operations using vector registers. If the operation is not supported, it returns with SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, it does not emit any instructions. - type must be a combination of SLJIT_SIMD_* and SLJIT_SIMD_OP2_ - options except SLJIT_SIMD_LOAD and SLJIT_SIMD_STORE - dst_freg is the destination register of the operation - src1_freg is the first source register of the operation - src1_freg is the second source register of the operation + type must be a combination of SLJIT_SIMD_*, SLJIT_SIMD_MEM_* + and SLJIT_SIMD_OP2_* options except SLJIT_SIMD_LOAD + and SLJIT_SIMD_STORE + dst_vreg is the destination register of the operation + src1_vreg is the first source register of the operation + src2 is the second source operand of the operation Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg); + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w); + +/* The following operations are used by sljit_emit_atomic_load() and + sljit_emit_atomic_store() operations. */ + +/* Tests whether the atomic operation is available (does not generate + any instructions). When a load from is allowed, its corresponding + store form is allowed and vice versa. */ +#define SLJIT_ATOMIC_TEST 0x10000 +/* The compiler must generate compare and swap instruction. + When this bit is set, calling sljit_emit_atomic_load() is optional. */ +#define SLJIT_ATOMIC_USE_CAS 0x20000 +/* The compiler must generate load-acquire and store-release instructions. + When this bit is set, the temp_reg for sljit_emit_atomic_store is not used. */ +#define SLJIT_ATOMIC_USE_LS 0x40000 /* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair can perform an atomic read-modify-write operation. First, an unsigned @@ -2121,23 +2246,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co sljit_emit_atomic_store. A thread can only perform a single atomic operation at a time. - Note: atomic operations are experimental, and not implemented - for all cpus. - The following conditions must be satisfied, or the operation is undefined: - the address provided in mem_reg must be divisible by the size of the value (only naturally aligned updates are supported) - - no memory writes are allowed between the load and store operations - regardless of its target address (currently read operations are - allowed, but this might change in the future) + - no memory operations are allowed between the load and store operations - the memory operation (op) and the base address (stored in mem_reg) passed to the load/store operations must be the same (the mem_reg can be a different register, only its value must be the same) - - an store must always follow a load for the same transaction. + - a store must always follow a load for the same transaction. - op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all - signed loads such as SLJIT_MOV32_S16 + op must be between SLJIT_MOV and SLJIT_MOV_P dst_reg is the register where the data will be loaded into mem_reg is the base address of the memory load (it cannot be SLJIT_SP or a virtual register on x86-32) @@ -2151,18 +2270,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler allows performing an atomic read-modify-write operation. See the description of sljit_emit_atomic_load. - op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all signed - loads such as SLJIT_MOV32_S16 + op must be between SLJIT_MOV and SLJIT_MOV_P src_reg is the register which value is stored into the memory mem_reg is the base address of the memory store (it cannot be SLJIT_SP or a virtual register on x86-32) - temp_reg is a not preserved scratch register, which must be - initialized with the value loaded into the dst_reg during the - corresponding sljit_emit_atomic_load operation, or the operation - is undefined - - Flags: ATOMIC_STORED is set if the operation is successful, - otherwise the memory remains unchanged. */ + temp_reg is a scratch register, which must be initialized with + the value loaded into the dst_reg during the corresponding + sljit_emit_atomic_load operation, or the operation is undefined. + The temp_reg register preserves its value, if the memory store + is successful. Otherwise, its value is undefined. + + Flags: ATOMIC_STORED + if ATOMIC_STORED flag is set, it represents that the memory + is updated with a new value. Otherwise the memory is unchanged. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src_reg, sljit_s32 mem_reg, @@ -2457,10 +2577,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct it is sometimes desired to free all unused memory regions, e.g. before the application terminates. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); -#endif +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ #ifdef __cplusplus } /* extern "C" */ -#endif +#endif /* __cplusplus */ #endif /* SLJIT_LIR_H_ */ diff --git a/src/sljit/sljitNativeARM_32.c b/deps/sljit/sljit_src/sljitNativeARM_32.c similarity index 93% rename from src/sljit/sljitNativeARM_32.c rename to deps/sljit/sljit_src/sljitNativeARM_32.c index a253c06..327dc82 100644 --- a/src/sljit/sljitNativeARM_32.c +++ b/deps/sljit/sljit_src/sljitNativeARM_32.c @@ -114,6 +114,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define CLZ 0xe16f0f10 #define CMN 0xe1600000 #define CMP 0xe1400000 +#define DMB_SY 0xf57ff05f #define EOR 0xe0200000 #define LDR 0xe5100000 #define LDR_POST 0xe4100000 @@ -180,6 +181,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define VST1_s 0xf4800000 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 +#define VTBL 0xf3b00800 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Arm v7 specific instructions. */ @@ -198,11 +200,28 @@ static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) fr -= SLJIT_F64_SECOND(0); - return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) - || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches)) + || (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0) || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); } +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type) +{ + sljit_s32 vr_low = vr; + + if (compiler->scratches == -1) + return 0; + + if (SLJIT_SIMD_GET_REG_SIZE(type) == 4) { + vr += (vr & 0x1); + vr_low = vr - 1; + } + + return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches)) + || (vr_low > (SLJIT_VS0 - compiler->vsaveds) && vr_low <= SLJIT_VS0) + || (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS)); +} + #endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) @@ -364,7 +383,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ - if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) { + if ((*last_pc_patch & 0x0e4f0000) == 0x040f0000) { diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; @@ -476,6 +495,14 @@ static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s3 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) { sljit_sw diff; + sljit_uw target_addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + jump->addr = jump_addr; +#endif if (jump->flags & SLJIT_REWRITABLE_JUMP) return 0; @@ -486,12 +513,17 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw #endif /* SLJIT_CONFIG_ARM_V6 */ if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); + target_addr = jump->u.target; else { SLJIT_ASSERT(jump->u.label != NULL); - diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); + target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr + 8, executable_offset); + /* Branch to Thumb code has not been optimized yet. */ if (diff & 0x3) return 0; @@ -503,12 +535,9 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw jump->flags |= PATCH_B; return 1; } - } - else { - if (diff <= 0x01ffffff && diff >= -0x02000000) { - *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); - jump->flags |= PATCH_B; - } + } else if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; } #else /* !SLJIT_CONFIG_ARM_V6 */ if (diff <= 0x01ffffff && diff >= -0x02000000) { @@ -714,16 +743,21 @@ static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_uw addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; sljit_sw diff; SLJIT_UNUSED_ARG(executable_offset); if (jump->flags & JUMP_ADDR) addr = jump->u.target; - else + else { addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } + /* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */ - diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) { jump->flags |= PATCH_B; @@ -784,6 +818,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) total_size = 1 - 1; @@ -796,6 +834,11 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & JUMP_ADDR)) { diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } + if (diff <= 0xff + 2 && diff >= -0xff + 2) total_size = 0; } @@ -917,7 +960,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump->addr = (sljit_uw)code_ptr; #else /* !SLJIT_CONFIG_ARM_V6 */ word_count += jump->flags >> JUMP_SIZE_SHIFT; - jump->addr = (sljit_uw)code_ptr; if (!detect_jump_type(jump, code_ptr, code, executable_offset)) { code_ptr[2] = code_ptr[0]; addr = ((code_ptr[0] & 0xf) << 12); @@ -1131,6 +1173,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + case SLJIT_HAS_MEMORY_BARRIER: +#endif /* SLJIT_CONFIG_ARM_V7 */ return 1; case SLJIT_HAS_CTZ: @@ -1225,9 +1270,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 src2, sljit_sw src2w); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_uw imm, offset; sljit_s32 i, tmp, size, word_arg_count; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); @@ -1240,11 +1287,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); - imm = 0; + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + imm = 0; tmp = SLJIT_S0 - saveds; for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) imm |= (sljit_uw)1 << reg_map[i]; @@ -1391,15 +1442,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); /* Doubles are saved, so alignment is unaffected. */ @@ -2364,6 +2421,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; + case SLJIT_MEMORY_BARRIER: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return push_inst(compiler, DMB_SY); +#else /* !SLJIT_CONFIG_ARM_V7 */ + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_ARM_V7 */ case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -2630,7 +2693,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, slji if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64) return freg_map[reg]; - if (type != SLJIT_SIMD_REG_128) + if (type == SLJIT_SIMD_REG_128) return freg_map[reg] & ~0x1; return -1; @@ -3105,9 +3168,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); - jump->addr = compiler->size; PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); + jump->addr = compiler->size - 1; if (jump->flags & SLJIT_REWRITABLE_JUMP) compiler->patches++; @@ -3907,7 +3970,7 @@ static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg) #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3916,7 +3979,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3930,16 +3993,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (!(srcdst & SLJIT_MEM)) { if (reg_size == 4) srcdst = simd_get_quad_reg_index(srcdst); if (type & SLJIT_SIMD_STORE) - ins = VD(srcdst) | VN(freg) | VM(freg); + ins = VD(srcdst) | VN(vreg) | VM(vreg); else - ins = VD(freg) | VN(srcdst) | VM(srcdst); + ins = VD(vreg) | VN(srcdst) | VM(srcdst); if (reg_size == 4) ins |= (sljit_ins)1 << 6; @@ -3952,7 +4015,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (elem_size > 3) elem_size = 3; - ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg) + ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(vreg) | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); SLJIT_ASSERT(reg_size >= alignment); @@ -4060,7 +4123,7 @@ static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4068,7 +4131,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil sljit_ins ins, imm; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -4082,24 +4145,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (src == SLJIT_IMM && srcw == 0) - return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg)); + return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(vreg)); if (SLJIT_UNLIKELY(elem_size == 3)) { SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT); if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw)); - src = freg; - } else if (freg != src) - FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src))); + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, vreg, src, srcw)); + src = vreg; + } else if (vreg != src) + FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src))); - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); - if (freg != src) - return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)); + if (vreg != src) + return push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src)); return SLJIT_SUCCESS; } @@ -4111,7 +4174,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= (sljit_ins)1 << 5; - return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf); + return push_inst(compiler, VLD1_r | ins | VD(vreg) | RN(src) | 0xf); } if (type & SLJIT_SIMD_FLOAT) { @@ -4121,7 +4184,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= (sljit_ins)1 << 6; - return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]); + return push_inst(compiler, VDUP_s | ins | VD(vreg) | (sljit_ins)freg_map[src]); } if (src == SLJIT_IMM) { @@ -4134,7 +4197,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) imm |= (sljit_ins)1 << 6; - return push_inst(compiler, VMOV_i | imm | VD(freg)); + return push_inst(compiler, VMOV_i | imm | VD(vreg)); } FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); @@ -4156,11 +4219,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= (sljit_ins)1 << 21; - return push_inst(compiler, VDUP | ins | VN(freg) | RD(src)); + return push_inst(compiler, VDUP | ins | VN(vreg) | RD(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4168,7 +4231,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -4182,7 +4245,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (type & SLJIT_SIMD_LANE_ZERO) { ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6); @@ -4190,62 +4253,62 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (type & SLJIT_SIMD_FLOAT) { if (elem_size == 3 && !(srcdst & SLJIT_MEM)) { if (lane_index == 1) - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); - if (srcdst != freg) - FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst))); + if (srcdst != vreg) + FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(srcdst) | VM(srcdst))); - freg += SLJIT_QUAD_OTHER_HALF(freg); - return push_inst(compiler, VMOV_i | VD(freg)); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + return push_inst(compiler, VMOV_i | VD(vreg)); } - if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) { - FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg))); + if (srcdst == vreg || (elem_size == 3 && srcdst == (vreg + SLJIT_QUAD_OTHER_HALF(vreg)))) { + FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(vreg) | VM(vreg))); srcdst = TMP_FREG2; srcdstw = 0; } } - FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg))); + FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(vreg))); } if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) { lane_index -= (0x8 >> elem_size); - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); } if (srcdst & SLJIT_MEM) { if (elem_size == 3) - return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw); + return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, vreg, srcdst, srcdstw); FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); lane_index = lane_index << elem_size; ins = (sljit_ins)((elem_size << 10) | (lane_index << 5)); - return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf); + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(vreg) | RN(srcdst) | 0xf); } if (type & SLJIT_SIMD_FLOAT) { if (elem_size == 3) { if (type & SLJIT_SIMD_STORE) - return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg)); - return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst)); + return push_inst(compiler, VORR | VD(srcdst) | VN(vreg) | VM(vreg)); + return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(vreg) | VM(srcdst)); } if (type & SLJIT_SIMD_STORE) { - if (freg_ebit_map[freg] == 0) { + if (freg_ebit_map[vreg] == 0) { if (lane_index == 1) - freg = SLJIT_F64_SECOND(freg); + vreg = SLJIT_F64_SECOND(vreg); - return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg)); + return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(vreg)); } - FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1))); + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(vreg) | RD(TMP_REG1))); return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1)); } FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1))); - return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)); + return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(vreg) | RD(TMP_REG1)); } if (srcdst == SLJIT_IMM) { @@ -4273,11 +4336,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile ins |= (1 << 23); } - return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst)); + return push_inst(compiler, VMOV_s | ins | VN(vreg) | RD(srcdst)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4285,7 +4348,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); if (reg_size != 3 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4297,7 +4360,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c return SLJIT_SUCCESS; if (reg_size == 4) { - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); src = simd_get_quad_reg_index(src); if (src_lane_index >= (0x8 >> elem_size)) { @@ -4307,13 +4370,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c } if (elem_size == 3) { - if (freg != src) - FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src))); + if (vreg != src) + FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src))); - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); - if (freg != src) - return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)); + if (vreg != src) + return push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src)); return SLJIT_SUCCESS; } @@ -4322,11 +4385,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (reg_size == 4) ins |= (sljit_ins)1 << 6; - return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src)); + return push_inst(compiler, VDUP_s | ins | VD(vreg) | VM(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4335,7 +4398,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_s32 dst_reg; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -4349,20 +4412,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (src & SLJIT_MEM) { FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); if (reg_size == 4 && elem2_size - elem_size == 1) - FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf)); + FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(vreg) | RN(src) | 0xf)); else - FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf)); - src = freg; + FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(vreg) | RN(src) | 0xf)); + src = vreg; } else if (reg_size == 4) src = simd_get_quad_reg_index(src); if (!(type & SLJIT_SIMD_FLOAT)) { - dst_reg = (reg_size == 4) ? freg : TMP_FREG2; + dst_reg = (reg_size == 4) ? vreg : TMP_FREG2; do { FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24)) @@ -4371,27 +4434,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler } while (++elem_size < elem2_size); if (dst_reg == TMP_FREG2) - return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2)); + return push_inst(compiler, VORR | VD(vreg) | VN(TMP_FREG2) | VM(TMP_FREG2)); return SLJIT_SUCCESS; } /* No SIMD variant, must use VFP instead. */ SLJIT_ASSERT(reg_size == 4); - if (freg == src) { - freg += SLJIT_QUAD_OTHER_HALF(freg); - FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20)); - freg += SLJIT_QUAD_OTHER_HALF(freg); - return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)); + if (vreg == src) { + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src) | 0x20)); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + return push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src)); } - FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src))); - freg += SLJIT_QUAD_OTHER_HALF(freg); - return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20); + FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src))); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + return push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src) | 0x20); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4400,7 +4463,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c sljit_s32 dst_r; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); @@ -4433,12 +4496,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } if (reg_size == 4) { - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); ins |= (sljit_ins)1 << 6; } SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0); - FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg))); + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(vreg))); if (reg_size == 4 && elem_size > 0) FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2))); @@ -4468,14 +4531,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); - sljit_ins ins = 0; + sljit_s32 alignment; + sljit_ins ins = 0, load_ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + ADJUST_LOCAL_OFFSET(src2, src2w); if (reg_size != 3 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4483,6 +4548,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) return SLJIT_ERR_UNSUPPORTED; + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + switch (SLJIT_SIMD_GET_OPCODE(type)) { case SLJIT_SIMD_OP2_AND: ins = VAND; @@ -4493,19 +4561,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co case SLJIT_SIMD_OP2_XOR: ins = VEOR; break; + case SLJIT_SIMD_OP2_SHUFFLE: + ins = VTBL; + break; } - if (type & SLJIT_SIMD_TEST) - return SLJIT_SUCCESS; + if (src2 & SLJIT_MEM) { + if (elem_size > 3) + elem_size = 3; + + load_ins = VLD1 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); + alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + + SLJIT_ASSERT(reg_size >= alignment); + + if (alignment == 3) + load_ins |= 0x10; + else if (alignment >= 4) + load_ins |= 0x20; + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w)); + FAIL_IF(push_inst(compiler, load_ins | VD(TMP_FREG2) | RN(src2) | ((sljit_ins)elem_size) << 6 | 0xf)); + src2 = TMP_FREG2; + } if (reg_size == 4) { - dst_freg = simd_get_quad_reg_index(dst_freg); - src1_freg = simd_get_quad_reg_index(src1_freg); - src2_freg = simd_get_quad_reg_index(src2_freg); + dst_vreg = simd_get_quad_reg_index(dst_vreg); + src1_vreg = simd_get_quad_reg_index(src1_vreg); + src2 = simd_get_quad_reg_index(src2); + + if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) { + ins |= (sljit_ins)1 << 8; + + FAIL_IF(push_inst(compiler, ins | VD(dst_vreg != src1_vreg ? dst_vreg : TMP_FREG2) | VN(src1_vreg) | VM(src2))); + src2 += SLJIT_QUAD_OTHER_HALF(src2); + FAIL_IF(push_inst(compiler, ins | VD(dst_vreg + SLJIT_QUAD_OTHER_HALF(dst_vreg)) | VN(src1_vreg) | VM(src2))); + + if (dst_vreg == src1_vreg) + return push_inst(compiler, VORR | VD(dst_vreg) | VN(TMP_FREG2) | VM(TMP_FREG2)); + return SLJIT_SUCCESS; + } + ins |= (sljit_ins)1 << 6; } - return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); + return push_inst(compiler, ins | VD(dst_vreg) | VN(src1_vreg) | VM(src2)); } #undef FPU_LOAD @@ -4519,7 +4619,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + case SLJIT_MOV_U8: ins = LDREXB; break; @@ -4531,6 +4639,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler break; } + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg)); } @@ -4547,7 +4658,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + case SLJIT_MOV_U8: ins = STREXB; break; @@ -4559,6 +4678,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler break; } + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg))); if (op & SLJIT_SET_ATOMIC_STORED) return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1)); diff --git a/src/sljit/sljitNativeARM_64.c b/deps/sljit/sljit_src/sljitNativeARM_64.c similarity index 94% rename from src/sljit/sljitNativeARM_64.c rename to deps/sljit/sljit_src/sljitNativeARM_64.c index 5331ebd..d80c9e5 100644 --- a/src/sljit/sljitNativeARM_64.c +++ b/deps/sljit/sljit_src/sljitNativeARM_64.c @@ -91,6 +91,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define CLZ 0xdac01000 #define CSEL 0x9a800000 #define CSINC 0x9a800400 +#define DMB_SY 0xd5033fbf #define DUP_e 0x0e000400 #define DUP_g 0x0e000c00 #define EOR 0xca000000 @@ -171,6 +172,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SUBI 0xd1000000 #define SUBS 0xeb000000 #define TBZ 0x36000000 +#define TBL_v 0x0e000000 #define UBFM 0xd3400000 #define UCVTF 0x9e630000 #define UDIV 0x9ac00800 @@ -208,7 +210,11 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i { sljit_sw diff; sljit_uw target_addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + jump->addr = jump_addr; if (jump->flags & SLJIT_REWRITABLE_JUMP) goto exit; @@ -216,10 +222,13 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->u.label != NULL); - target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } - diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); @@ -271,16 +280,21 @@ exit: static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_uw addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; sljit_sw diff; SLJIT_UNUSED_ARG(executable_offset); SLJIT_ASSERT(jump->flags < ((sljit_uw)4 << JUMP_SIZE_SHIFT)); if (jump->flags & JUMP_ADDR) addr = jump->u.target; - else + else { addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); - diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (diff <= 0xfffff && diff >= -0x100000) { jump->flags |= PATCH_B; @@ -422,6 +436,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) } else { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if ((jump->flags & IS_COND) && (diff + 1) <= (0xfffff / SSIZE_OF(ins)) && (diff + 1) >= (-0x100000 / SSIZE_OF(ins))) total_size = 0; @@ -439,6 +457,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & JUMP_ADDR)) { diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (diff <= (0xfffff / SSIZE_OF(ins)) && diff >= (-0x100000 / SSIZE_OF(ins))) total_size = 0; @@ -516,7 +538,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (next_min_addr == next_jump_addr) { if (!(jump->flags & JUMP_MOV_ADDR)) { word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); - jump->addr = (sljit_uw)code_ptr; code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); SLJIT_ASSERT((jump->flags & PATCH_COND) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); } else { @@ -593,6 +614,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; default: @@ -1208,16 +1230,23 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s /* --------------------------------------------------------------------- */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 prev, fprev, saved_regs_size, i, tmp; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_ins offs; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2); saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); @@ -1383,15 +1412,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 saved_regs_size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2); saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); @@ -1537,7 +1572,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile op = GET_OPCODE(op); switch (op) { case SLJIT_BREAKPOINT: - return push_inst(compiler, BRK); + return push_inst(compiler, BRK | (0xf000 << 5)); case SLJIT_NOP: return push_inst(compiler, NOP); case SLJIT_LMUL_UW: @@ -1554,6 +1589,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_DIV_UW: case SLJIT_DIV_SW: return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, DMB_SY); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -2775,7 +2812,7 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -2783,7 +2820,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -2798,9 +2835,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (!(srcdst & SLJIT_MEM)) { if (type & SLJIT_SIMD_STORE) - ins = VD(srcdst) | VN(freg) | VM(freg); + ins = VD(srcdst) | VN(vreg) | VM(vreg); else - ins = VD(freg) | VN(srcdst) | VM(srcdst); + ins = VD(vreg) | VN(srcdst) | VM(srcdst); if (reg_size == 4) ins |= (1 << 30); @@ -2818,7 +2855,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (reg_size == 4) ins |= (1 << 30); - return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg)); + return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(vreg)); } static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) @@ -2923,7 +2960,7 @@ static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -2931,7 +2968,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil sljit_ins ins, imm; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -2952,7 +2989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= (sljit_ins)1 << 30; - return push_inst(compiler, LD1R | ins | RN(src) | VT(freg)); + return push_inst(compiler, LD1R | ins | RN(src) | VT(vreg)); } ins = (sljit_ins)1 << (16 + elem_size); @@ -2962,9 +2999,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) - return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg)); + return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(vreg)); - return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); + return push_inst(compiler, DUP_e | ins | VD(vreg) | VN(src)); } if (src == SLJIT_IMM) { @@ -2976,18 +3013,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (imm != ~(sljit_ins)0) { imm |= ins & ((sljit_ins)1 << 30); - return push_inst(compiler, MOVI | imm | VD(freg)); + return push_inst(compiler, MOVI | imm | VD(vreg)); } FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); src = TMP_REG2; } - return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src)); + return push_inst(compiler, DUP_g | ins | VD(vreg) | RN(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -2995,7 +3032,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3011,13 +3048,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (type & SLJIT_SIMD_LANE_ZERO) { ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30); - if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { - FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg))); + if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) { + FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(vreg) | VM(vreg))); srcdst = TMP_FREG1; srcdstw = 0; } - FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg))); + FAIL_IF(push_inst(compiler, MOVI | ins | VD(vreg))); } if (srcdst & SLJIT_MEM) { @@ -3033,14 +3070,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile lane_index = lane_index << elem_size; ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10)); - return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg)); + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(vreg)); } if (type & SLJIT_SIMD_FLOAT) { if (type & SLJIT_SIMD_STORE) - ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg); + ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(vreg); else - ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst); + ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(vreg) | VN(srcdst); return push_inst(compiler, ins); } @@ -3054,7 +3091,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } if (type & SLJIT_SIMD_STORE) { - ins = RD(srcdst) | VN(freg); + ins = RD(srcdst) | VN(vreg); if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) { ins |= SMOV; @@ -3064,7 +3101,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } else ins |= UMOV; } else - ins = INS | VD(freg) | RN(srcdst); + ins = INS | VD(vreg) | RN(srcdst); if (elem_size == 3) ins |= (sljit_ins)1 << 30; @@ -3073,7 +3110,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3081,7 +3118,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); if (reg_size != 3 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -3097,11 +3134,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (reg_size == 4) ins |= (sljit_ins)1 << 30; - return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); + return push_inst(compiler, DUP_e | ins | VD(vreg) | VN(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3109,7 +3146,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -3126,28 +3163,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); if (reg_size == 4 && elem2_size - elem_size == 1) - FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg))); + FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(vreg))); else - FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg))); - src = freg; + FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(vreg))); + src = vreg; } if (type & SLJIT_SIMD_FLOAT) { SLJIT_ASSERT(reg_size == 4); - return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src)); + return push_inst(compiler, FCVTL | (1 << 22) | VD(vreg) | VN(src)); } do { FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL) - | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src))); - src = freg; + | ((sljit_ins)1 << (19 + elem_size)) | VD(vreg) | VN(src))); + src = vreg; } while (++elem_size < elem2_size); return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3156,7 +3193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c sljit_s32 dst_r; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); @@ -3191,7 +3228,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c if (reg_size == 4) ins |= (1 << 30); - FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg))); + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(vreg))); if (reg_size == 4 && elem_size > 0) FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1))); @@ -3224,14 +3261,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + ADJUST_LOCAL_OFFSET(src2, src2w); if (reg_size != 3 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -3239,6 +3277,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) return SLJIT_ERR_UNSUPPORTED; + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + switch (SLJIT_SIMD_GET_OPCODE(type)) { case SLJIT_SIMD_OP2_AND: ins = AND_v; @@ -3249,15 +3290,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co case SLJIT_SIMD_OP2_XOR: ins = EOR_v; break; + case SLJIT_SIMD_OP2_SHUFFLE: + ins = TBL_v; + break; } - if (type & SLJIT_SIMD_TEST) - return SLJIT_SUCCESS; + if (src2 & SLJIT_MEM) { + if (elem_size > 3) + elem_size = 3; + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w)); + push_inst(compiler, LD1 | (reg_size == 4 ? (1 << 30) : 0) | ((sljit_ins)elem_size << 10) | RN(src2) | VT(TMP_FREG1)); + src2 = TMP_FREG1; + } if (reg_size == 4) ins |= (sljit_ins)1 << 30; - return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); + return push_inst(compiler, ins | VD(dst_vreg) | VN(src1_vreg) | VM(src2)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, @@ -3269,39 +3319,55 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); -#ifdef __ARM_FEATURE_ATOMICS - switch (GET_OPCODE(op)) { - case SLJIT_MOV32: - case SLJIT_MOV_U32: - ins = LDR ^ (1 << 30); - break; - case SLJIT_MOV_U16: - ins = LDRH; - break; - case SLJIT_MOV_U8: - ins = LDRB; - break; - default: - ins = LDR; - break; - } -#else /* !__ARM_FEATURE_ATOMICS */ +#ifndef __ARM_FEATURE_ATOMICS + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; +#endif /* ARM_FEATURE_ATOMICS */ + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + case SLJIT_MOV32: case SLJIT_MOV_U32: - ins = LDXR ^ (1 << 30); +#ifdef __ARM_FEATURE_ATOMICS + if (!(op & SLJIT_ATOMIC_USE_LS)) + ins = LDR ^ (1 << 30); + else +#endif /* ARM_FEATURE_ATOMICS */ + ins = LDXR ^ (1 << 30); break; case SLJIT_MOV_U8: - ins = LDXRB; +#ifdef __ARM_FEATURE_ATOMICS + if (!(op & SLJIT_ATOMIC_USE_LS)) + ins = LDRB; + else +#endif /* ARM_FEATURE_ATOMICS */ + ins = LDXRB; break; case SLJIT_MOV_U16: - ins = LDXRH; +#ifdef __ARM_FEATURE_ATOMICS + if (!(op & SLJIT_ATOMIC_USE_LS)) + ins = LDRH; + else +#endif /* ARM_FEATURE_ATOMICS */ + ins = LDXRH; break; default: - ins = LDXR; +#ifdef __ARM_FEATURE_ATOMICS + if (!(op & SLJIT_ATOMIC_USE_LS)) + ins = LDR; + else +#endif /* ARM_FEATURE_ATOMICS */ + ins = LDXR; break; } -#endif /* ARM_FEATURE_ATOMICS */ + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg)); } @@ -3311,55 +3377,65 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler sljit_s32 temp_reg) { sljit_ins ins; - sljit_s32 tmp = temp_reg; sljit_ins cmp = 0; - sljit_ins inv_bits = W_OP; CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); #ifdef __ARM_FEATURE_ATOMICS - if (op & SLJIT_SET_ATOMIC_STORED) - cmp = (SUBS ^ W_OP) | RD(TMP_ZERO); + if (!(op & SLJIT_ATOMIC_USE_LS)) { + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBS ^ W_OP) | RD(TMP_ZERO); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = CAS ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = CASH; + break; + case SLJIT_MOV_U8: + ins = CASB; + break; + default: + ins = CAS; + if (cmp) + cmp ^= W_OP; + break; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; - switch (GET_OPCODE(op)) { - case SLJIT_MOV32: - case SLJIT_MOV_U32: - ins = CAS ^ (1 << 30); - break; - case SLJIT_MOV_U16: - ins = CASH; - break; - case SLJIT_MOV_U8: - ins = CASB; - break; - default: - ins = CAS; - inv_bits = 0; if (cmp) - cmp ^= W_OP; - break; - } + FAIL_IF(push_inst(compiler, ((MOV ^ W_OP) ^ (cmp & W_OP)) | RM(temp_reg) | RD(TMP_REG2))); - if (cmp) { - FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1))); - tmp = TMP_REG1; - } - FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg))); - if (!cmp) - return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, ins | RM(temp_reg) | RN(mem_reg) | RD(src_reg))); + if (!cmp) + return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg))); - FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp))); - return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO)); + return push_inst(compiler, cmp | RM(TMP_REG2) | RN(temp_reg)); + } #else /* !__ARM_FEATURE_ATOMICS */ - SLJIT_UNUSED_ARG(tmp); - SLJIT_UNUSED_ARG(inv_bits); + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; +#endif /* __ARM_FEATURE_ATOMICS */ if (op & SLJIT_SET_ATOMIC_STORED) cmp = (SUBI ^ W_OP) | (1 << 29); switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + case SLJIT_MOV32: case SLJIT_MOV_U32: ins = STXR ^ (1 << 30); @@ -3375,9 +3451,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler break; } - FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg))); - return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS; -#endif /* __ARM_FEATURE_ATOMICS */ + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, ins | RM(TMP_REG2) | RN(mem_reg) | RT(src_reg))); + if (!cmp) + return SLJIT_SUCCESS; + return push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG2)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) diff --git a/src/sljit/sljitNativeARM_T2_32.c b/deps/sljit/sljit_src/sljitNativeARM_T2_32.c similarity index 94% rename from src/sljit/sljitNativeARM_T2_32.c rename to deps/sljit/sljit_src/sljitNativeARM_T2_32.c index 799954a..d8058ab 100644 --- a/src/sljit/sljitNativeARM_T2_32.c +++ b/deps/sljit/sljit_src/sljitNativeARM_T2_32.c @@ -138,6 +138,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define CMPI_W 0xf1b00f00 #define CMP_X 0x4500 #define CMP_W 0xebb00f00 +#define DMB_SY 0xf3bf8f5f #define EORI 0xf0800000 #define EORS 0x4040 #define EOR_W 0xea800000 @@ -253,6 +254,7 @@ static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) #define VST1_s 0xf9800000 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 +#define VTBL 0xffb00800 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -264,11 +266,28 @@ static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) fr -= SLJIT_F64_SECOND(0); - return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) - || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches)) + || (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0) || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); } +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type) +{ + sljit_s32 vr_low = vr; + + if (compiler->scratches == -1) + return 0; + + if (SLJIT_SIMD_GET_REG_SIZE(type) == 4) { + vr += (vr & 0x1); + vr_low = vr - 1; + } + + return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches)) + || (vr_low > (SLJIT_VS0 - compiler->vsaveds) && vr_low <= SLJIT_VS0) + || (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS)); +} + #endif /* SLJIT_ARGUMENT_CHECKS */ static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) @@ -320,7 +339,12 @@ static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) { sljit_sw diff; + sljit_uw target_addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + jump->addr = jump_addr; if (jump->flags & SLJIT_REWRITABLE_JUMP) goto exit; @@ -328,12 +352,17 @@ static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u /* Branch to ARM code is not optimized yet. */ if (!(jump->u.target & 0x1)) goto exit; - diff = (sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset; + target_addr = jump->u.target; } else { SLJIT_ASSERT(jump->u.label != NULL); - diff = (sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2); + target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr + 4, executable_offset); + if (jump->flags & IS_COND) { SLJIT_ASSERT(!(jump->flags & IS_BL)); /* Size of the prefix IT instruction. */ @@ -380,16 +409,21 @@ exit: static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) { sljit_uw addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; sljit_sw diff; SLJIT_UNUSED_ARG(executable_offset); if (jump->flags & JUMP_ADDR) addr = jump->u.target; - else + else { addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } + /* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */ - diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); /* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */ @@ -517,6 +551,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (jump->flags & IS_COND) { diff++; @@ -540,6 +578,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & JUMP_ADDR)) { diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16))) total_size = 1; @@ -612,7 +654,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (next_min_addr == next_jump_addr) { if (!(jump->flags & JUMP_MOV_ADDR)) { half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); - jump->addr = (sljit_uw)code_ptr; code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr < ((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16)); @@ -694,6 +735,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; default: @@ -1367,9 +1409,11 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit /* --------------------------------------------------------------------- */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 size, i, tmp, word_arg_count; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_uw offset; @@ -1383,8 +1427,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; tmp = SLJIT_S0 - saveds; for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) @@ -1577,15 +1626,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); /* Doubles are saved, so alignment is unaffected. */ @@ -1904,6 +1959,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile } return SLJIT_SUCCESS; #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */ + case SLJIT_MEMORY_BARRIER: + return push_inst32(compiler, DMB_SY); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -2204,7 +2261,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, slji if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64) return freg_map[reg]; - if (type != SLJIT_SIMD_REG_128) + if (type == SLJIT_SIMD_REG_128) return freg_map[reg] & ~0x1; return -1; @@ -3582,7 +3639,7 @@ static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg) #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3591,7 +3648,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3605,16 +3662,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (!(srcdst & SLJIT_MEM)) { if (reg_size == 4) srcdst = simd_get_quad_reg_index(srcdst); if (type & SLJIT_SIMD_STORE) - ins = VD4(srcdst) | VN4(freg) | VM4(freg); + ins = VD4(srcdst) | VN4(vreg) | VM4(vreg); else - ins = VD4(freg) | VN4(srcdst) | VM4(srcdst); + ins = VD4(vreg) | VN4(srcdst) | VM4(srcdst); if (reg_size == 4) ins |= (sljit_ins)1 << 6; @@ -3627,7 +3684,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (elem_size > 3) elem_size = 3; - ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg) + ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(vreg) | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); SLJIT_ASSERT(reg_size >= alignment); @@ -3735,7 +3792,7 @@ static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3743,7 +3800,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil sljit_ins ins, imm; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -3757,24 +3814,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (src == SLJIT_IMM && srcw == 0) - return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg)); + return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(vreg)); if (SLJIT_UNLIKELY(elem_size == 3)) { SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT); if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw)); - src = freg; - } else if (freg != src) - FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src))); + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, vreg, src, srcw)); + src = vreg; + } else if (vreg != src) + FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src))); - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); - if (freg != src) - return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)); + if (vreg != src) + return push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src)); return SLJIT_SUCCESS; } @@ -3786,7 +3843,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= 1 << 5; - return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf); + return push_inst32(compiler, VLD1_r | ins | VD4(vreg) | RN4(src) | 0xf); } if (type & SLJIT_SIMD_FLOAT) { @@ -3796,7 +3853,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= (sljit_ins)1 << 6; - return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]); + return push_inst32(compiler, VDUP_s | ins | VD4(vreg) | (sljit_ins)freg_map[src]); } if (src == SLJIT_IMM) { @@ -3809,7 +3866,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) imm |= (sljit_ins)1 << 6; - return push_inst32(compiler, VMOV_i | imm | VD4(freg)); + return push_inst32(compiler, VMOV_i | imm | VD4(vreg)); } FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); @@ -3831,11 +3888,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 4) ins |= (sljit_ins)1 << 21; - return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src)); + return push_inst32(compiler, VDUP | ins | VN4(vreg) | RT4(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3843,7 +3900,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3857,7 +3914,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (type & SLJIT_SIMD_LANE_ZERO) { ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6); @@ -3865,62 +3922,62 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (type & SLJIT_SIMD_FLOAT) { if (elem_size == 3 && !(srcdst & SLJIT_MEM)) { if (lane_index == 1) - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); - if (srcdst != freg) - FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst))); + if (srcdst != vreg) + FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(srcdst) | VM4(srcdst))); - freg += SLJIT_QUAD_OTHER_HALF(freg); - return push_inst32(compiler, VMOV_i | VD4(freg)); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + return push_inst32(compiler, VMOV_i | VD4(vreg)); } - if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) { - FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg))); + if (srcdst == vreg || (elem_size == 3 && srcdst == (vreg + SLJIT_QUAD_OTHER_HALF(vreg)))) { + FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(vreg) | VM4(vreg))); srcdst = TMP_FREG2; srcdstw = 0; } } - FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg))); + FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(vreg))); } if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) { lane_index -= (0x8 >> elem_size); - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); } if (srcdst & SLJIT_MEM) { if (elem_size == 3) - return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw); + return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, vreg, srcdst, srcdstw); FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); lane_index = lane_index << elem_size; ins = (sljit_ins)((elem_size << 10) | (lane_index << 5)); - return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf); + return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(vreg) | RN4(srcdst) | 0xf); } if (type & SLJIT_SIMD_FLOAT) { if (elem_size == 3) { if (type & SLJIT_SIMD_STORE) - return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg)); - return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst)); + return push_inst32(compiler, VORR | VD4(srcdst) | VN4(vreg) | VM4(vreg)); + return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(vreg) | VM4(srcdst)); } if (type & SLJIT_SIMD_STORE) { - if (freg_ebit_map[freg] == 0) { + if (freg_ebit_map[vreg] == 0) { if (lane_index == 1) - freg = SLJIT_F64_SECOND(freg); + vreg = SLJIT_F64_SECOND(vreg); - return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg)); + return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(vreg)); } - FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(vreg) | RT4(TMP_REG1))); return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1)); } FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1))); - return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)); + return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(vreg) | RT4(TMP_REG1)); } if (srcdst == SLJIT_IMM) { @@ -3948,11 +4005,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile ins |= (1 << 23); } - return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst)); + return push_inst32(compiler, VMOV_s | ins | VN4(vreg) | RT4(srcdst)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3960,7 +4017,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); if (reg_size != 3 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -3972,7 +4029,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c return SLJIT_SUCCESS; if (reg_size == 4) { - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); src = simd_get_quad_reg_index(src); if (src_lane_index >= (0x8 >> elem_size)) { @@ -3982,13 +4039,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c } if (elem_size == 3) { - if (freg != src) - FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src))); + if (vreg != src) + FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src))); - freg += SLJIT_QUAD_OTHER_HALF(freg); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); - if (freg != src) - return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)); + if (vreg != src) + return push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src)); return SLJIT_SUCCESS; } @@ -3997,11 +4054,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (reg_size == 4) ins |= (sljit_ins)1 << 6; - return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src)); + return push_inst32(compiler, VDUP_s | ins | VD4(vreg) | VM4(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4010,7 +4067,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_s32 dst_reg; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -4024,20 +4081,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler return SLJIT_SUCCESS; if (reg_size == 4) - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); if (src & SLJIT_MEM) { FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); if (reg_size == 4 && elem2_size - elem_size == 1) - FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf)); + FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(vreg) | RN4(src) | 0xf)); else - FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf)); - src = freg; + FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(vreg) | RN4(src) | 0xf)); + src = vreg; } else if (reg_size == 4) src = simd_get_quad_reg_index(src); if (!(type & SLJIT_SIMD_FLOAT)) { - dst_reg = (reg_size == 4) ? freg : TMP_FREG2; + dst_reg = (reg_size == 4) ? vreg : TMP_FREG2; do { FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28)) @@ -4046,27 +4103,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler } while (++elem_size < elem2_size); if (dst_reg == TMP_FREG2) - return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2)); + return push_inst32(compiler, VORR | VD4(vreg) | VN4(TMP_FREG2) | VM4(TMP_FREG2)); return SLJIT_SUCCESS; } /* No SIMD variant, must use VFP instead. */ SLJIT_ASSERT(reg_size == 4); - if (freg == src) { - freg += SLJIT_QUAD_OTHER_HALF(freg); - FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20)); - freg += SLJIT_QUAD_OTHER_HALF(freg); - return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)); + if (vreg == src) { + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src) | 0x20)); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + return push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src)); } - FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src))); - freg += SLJIT_QUAD_OTHER_HALF(freg); - return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20); + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src))); + vreg += SLJIT_QUAD_OTHER_HALF(vreg); + return push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src) | 0x20); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4075,7 +4132,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c sljit_s32 dst_r; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); @@ -4108,12 +4165,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } if (reg_size == 4) { - freg = simd_get_quad_reg_index(freg); + vreg = simd_get_quad_reg_index(vreg); ins |= (sljit_ins)1 << 6; } SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0); - FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg))); + FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(vreg))); if (reg_size == 4 && elem_size > 0) FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); @@ -4143,14 +4200,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); - sljit_ins ins = 0; + sljit_s32 alignment; + sljit_ins ins = 0, load_ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + ADJUST_LOCAL_OFFSET(src2, src2w); if (reg_size != 3 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4158,6 +4217,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) return SLJIT_ERR_UNSUPPORTED; + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + switch (SLJIT_SIMD_GET_OPCODE(type)) { case SLJIT_SIMD_OP2_AND: ins = VAND; @@ -4168,19 +4230,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co case SLJIT_SIMD_OP2_XOR: ins = VEOR; break; + case SLJIT_SIMD_OP2_SHUFFLE: + ins = VTBL; + break; } - if (type & SLJIT_SIMD_TEST) - return SLJIT_SUCCESS; + if (src2 & SLJIT_MEM) { + if (elem_size > 3) + elem_size = 3; + + load_ins = VLD1 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); + alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + + SLJIT_ASSERT(reg_size >= alignment); + + if (alignment == 3) + load_ins |= 0x10; + else if (alignment >= 4) + load_ins |= 0x20; + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w)); + FAIL_IF(push_inst32(compiler, load_ins | VD4(TMP_FREG2) | RN4(src2) | ((sljit_ins)elem_size) << 6 | 0xf)); + src2 = TMP_FREG2; + } if (reg_size == 4) { - dst_freg = simd_get_quad_reg_index(dst_freg); - src1_freg = simd_get_quad_reg_index(src1_freg); - src2_freg = simd_get_quad_reg_index(src2_freg); + dst_vreg = simd_get_quad_reg_index(dst_vreg); + src1_vreg = simd_get_quad_reg_index(src1_vreg); + src2 = simd_get_quad_reg_index(src2); + + if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) { + ins |= (sljit_ins)1 << 8; + + FAIL_IF(push_inst32(compiler, ins | VD4(dst_vreg != src1_vreg ? dst_vreg : TMP_FREG2) | VN4(src1_vreg) | VM4(src2))); + src2 += SLJIT_QUAD_OTHER_HALF(src2); + FAIL_IF(push_inst32(compiler, ins | VD4(dst_vreg + SLJIT_QUAD_OTHER_HALF(dst_vreg)) | VN4(src1_vreg) | VM4(src2))); + + if (dst_vreg == src1_vreg) + return push_inst32(compiler, VORR | VD4(dst_vreg) | VN4(TMP_FREG2) | VM4(TMP_FREG2)); + return SLJIT_SUCCESS; + } + ins |= (sljit_ins)1 << 6; } - return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg)); + return push_inst32(compiler, ins | VD4(dst_vreg) | VN4(src1_vreg) | VM4(src2)); } #undef FPU_LOAD @@ -4194,7 +4288,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + case SLJIT_MOV_U8: ins = LDREXB; break; @@ -4206,6 +4308,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler break; } + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg)); } @@ -4222,7 +4327,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + case SLJIT_MOV_S16: + case SLJIT_MOV_S32: + return SLJIT_ERR_UNSUPPORTED; + case SLJIT_MOV_U8: ins = STREXB | RM4(TMP_REG1); break; @@ -4234,6 +4347,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler break; } + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg))); if (op & SLJIT_SET_ATOMIC_STORED) return push_inst32(compiler, CMPI_W | RN4(TMP_REG1)); diff --git a/src/sljit/sljitNativeLOONGARCH_64.c b/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c similarity index 93% rename from src/sljit/sljitNativeLOONGARCH_64.c rename to deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c index 2e1d742..73868ca 100644 --- a/src/sljit/sljitNativeLOONGARCH_64.c +++ b/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c @@ -250,6 +250,9 @@ lower parts in the instruction word, denoted by the “L” and “H” suffixes #define AMCAS_W OPC_3R(0x70B2) #define AMCAS_D OPC_3R(0x70B3) +/* Memory barrier instructions */ +#define DBAR OPC_3R(0x70e4) + /* Other instructions */ #define BREAK OPC_3R(0x54) #define DBGCALL OPC_3R(0x55) @@ -348,6 +351,7 @@ lower parts in the instruction word, denoted by the “L” and “H” suffixes #define VREPLGR2VR OPC_2R(0x1ca7c0) #define VREPLVE OPC_3R(0xe244) #define VREPLVEI OPC_2R(0x1cbde0) +#define VSHUF_B OPC_4R(0xd5) #define XVPERMI OPC_2RI8(0x1dfa) #define I12_MAX (0x7ff) @@ -386,6 +390,8 @@ static sljit_u32 hwcap_feature_list = 0; #define GET_CFG2 0 #define GET_HWCAP 1 +#define LOONGARCH_SUPPORT_AMCAS (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2)) + static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type) { if (cfg2_feature_list == 0) @@ -405,14 +411,15 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - sljit_ins *inst; - - inst = (sljit_ins *)jump->addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + jump->addr = jump_addr; if (jump->flags & SLJIT_REWRITABLE_JUMP) goto exit; @@ -420,20 +427,23 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->u.label != NULL); - target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } - diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) { - inst--; - inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000; + code_ptr--; + code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000; jump->flags |= PATCH_B; - jump->addr = (sljit_uw)inst; - return inst; + jump->addr = (sljit_uw)code_ptr; + return code_ptr; } diff -= SSIZE_OF(ins); @@ -441,60 +451,65 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (diff >= JUMP_MIN && diff <= JUMP_MAX) { if (jump->flags & IS_COND) { - inst[-1] |= (sljit_ins)IMM_I16(2); + code_ptr[-1] |= (sljit_ins)IMM_I16(2); } jump->flags |= PATCH_J; - return inst; + return code_ptr; } if (diff >= S32_MIN && diff <= S32_MAX) { if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(3); + code_ptr[-1] |= (sljit_ins)IMM_I16(3); jump->flags |= PATCH_REL32; - inst[1] = inst[0]; - return inst + 1; + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; } if (target_addr <= (sljit_uw)S32_MAX) { if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(3); + code_ptr[-1] |= (sljit_ins)IMM_I16(3); jump->flags |= PATCH_ABS32; - inst[1] = inst[0]; - return inst + 1; + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; } if (target_addr <= S52_MAX) { if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(4); + code_ptr[-1] |= (sljit_ins)IMM_I16(4); jump->flags |= PATCH_ABS52; - inst[2] = inst[0]; - return inst + 2; + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; } exit: if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(5); - inst[3] = inst[0]; - return inst + 3; + code_ptr[-1] |= (sljit_ins)IMM_I16(5); + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; } static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_uw addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; sljit_sw diff; SLJIT_UNUSED_ARG(executable_offset); SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); if (jump->flags & JUMP_ADDR) addr = jump->u.target; - else + else { addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); - diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (diff >= S32_MIN && diff <= S32_MAX) { SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); @@ -617,6 +632,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) } else { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins))) total_size = 0; @@ -635,6 +654,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & JUMP_ADDR)) { /* Real size minus 1. Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) total_size = 1; @@ -710,8 +733,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (next_min_addr == next_jump_addr) { if (!(jump->flags & JUMP_MOV_ADDR)) { word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); - jump->addr = (sljit_uw)code_ptr; - code_ptr = detect_jump_type(jump, code, executable_offset); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); } else { word_count += jump->flags >> JUMP_SIZE_SHIFT; @@ -804,9 +826,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_SIMD: return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP)); - case SLJIT_HAS_ATOMIC: - return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2)); - case SLJIT_HAS_CLZ: case SLJIT_HAS_CTZ: case SLJIT_HAS_REV: @@ -814,6 +833,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_PREFETCH: case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; default: @@ -889,16 +910,22 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 i, tmp, offset; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); @@ -973,13 +1000,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #undef STACK_MAX_DISTANCE SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; + CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); @@ -1884,6 +1918,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)); case SLJIT_DIV_SW: return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)); + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, DBAR); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -2644,10 +2680,8 @@ static sljit_ins get_jump_instruction(sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_ATOMIC_NOT_STORED: return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO); case SLJIT_NOT_EQUAL: - case SLJIT_ATOMIC_STORED: return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO); case SLJIT_LESS: case SLJIT_GREATER: @@ -2655,6 +2689,7 @@ static sljit_ins get_jump_instruction(sljit_s32 type) case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: case SLJIT_CARRY: + case SLJIT_ATOMIC_STORED: return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO); case SLJIT_GREATER_EQUAL: case SLJIT_LESS_EQUAL: @@ -2662,6 +2697,7 @@ static sljit_ins get_jump_instruction(sljit_s32 type) case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_CARRY: + case SLJIT_ATOMIC_NOT_STORED: return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO); case SLJIT_F_EQUAL: case SLJIT_ORDERED_EQUAL: @@ -2933,7 +2969,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_ATOMIC_STORED: case SLJIT_ATOMIC_NOT_STORED: - FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1))); src_r = dst_r; invert ^= 0x1; break; @@ -3162,14 +3198,14 @@ static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, slj } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3184,9 +3220,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (!(srcdst & SLJIT_MEM)) { if (type & SLJIT_SIMD_STORE) - ins = FRD(srcdst) | FRJ(freg) | FRK(freg); + ins = FRD(srcdst) | FRJ(vreg) | FRK(vreg); else - ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst); + ins = FRD(vreg) | FRJ(srcdst) | FRK(srcdst); if (reg_size == 5) ins |= VOR_V | (sljit_ins)1 << 26; @@ -3202,15 +3238,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD; if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX)) - return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw)); + return push_inst(compiler, ins | FRD(vreg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw)); else { FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); - return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0)); + return push_inst(compiler, ins | FRD(vreg) | RJ(srcdst) | IMM_I12(0)); } } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3218,7 +3254,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -3237,7 +3273,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (reg_size == 5) ins = (sljit_ins)1 << 25; - return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size)); + return push_inst(compiler, VLDREPL | ins | FRD(vreg) | RJ(src) | (sljit_ins)1 << (23 - elem_size)); } if (reg_size == 5) @@ -3245,13 +3281,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) - return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10); + return push_inst(compiler, VREPLGR2VR | ins | FRD(vreg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10); - FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15)); + FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(vreg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15)); if (reg_size == 5) { ins = (sljit_ins)(0x44 << 10); - return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg)); + return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg)); } return SLJIT_SUCCESS; @@ -3264,11 +3300,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil src = TMP_REG2; } - return push_inst(compiler, ins | FRD(freg) | RJ(src)); + return push_inst(compiler, ins | FRD(vreg) | RJ(src)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3276,7 +3312,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3298,13 +3334,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (type & SLJIT_SIMD_LANE_ZERO) { ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0; - if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { - FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) { + FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg))); srcdst = TMP_FREG1; srcdstw = 0; } - FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg))); + FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(vreg) | FRJ(vreg) | FRK(vreg))); } if (srcdst & SLJIT_MEM) { @@ -3315,7 +3351,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (type & SLJIT_SIMD_STORE) { ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size)); - return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst)); + return push_inst(compiler, VSTELM | ins | FRD(vreg) | RJ(srcdst)); } else { emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0); srcdst = TMP_REG1; @@ -3323,20 +3359,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (reg_size == 5) { if (elem_size < 2) { - FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg))); if (lane_index >= (2 << (3 - elem_size))) { - FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1))); FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size))))); - return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2)); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2)); } else { - FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index))); - return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18)); + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18)); } } else ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26; } - return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)); + return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)); } } @@ -3344,11 +3380,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; if (type & SLJIT_SIMD_STORE) { - FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index))); + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(vreg) | IMM_V(lane_index))); return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0)); } else { FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0))); - return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index)); + return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(TMP_REG1) | IMM_V(lane_index)); } } @@ -3373,8 +3409,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile else ins |= VPICKVE2GR_U; - FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); - FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg))); + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1))); return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size)))); } } else { @@ -3383,33 +3419,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } } - return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index)); + return push_inst(compiler, ins | RD(srcdst) | FRJ(vreg) | IMM_V(lane_index)); } else { ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; if (reg_size == 5) { if (elem_size < 2) { - FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg))); if (lane_index >= (2 << (3 - elem_size))) { - FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1))); FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size))))); - return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2)); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2)); } else { - FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index))); - return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18)); + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18)); } } else ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26; } - return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)); + return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)); } return SLJIT_ERR_UNSUPPORTED; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3417,7 +3453,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); if (reg_size != 5 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -3431,18 +3467,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; if (reg_size == 5) { - FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size))))); + FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size))))); ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10); - return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg)); + return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg)); } - return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index)); + return push_inst(compiler, VREPLVEI | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3451,7 +3487,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -3471,12 +3507,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD; if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX)) - FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw))); + FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(srcw))); else { FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); - FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(0))); } - src = freg; + src = vreg; } if (type & SLJIT_SIMD_FLOAT) { @@ -3489,7 +3525,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16))); } - return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src)); + return push_inst(compiler, VFCVTL_D_S | ins | FRD(vreg) | FRJ(src)); } ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18); @@ -3501,15 +3537,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler if (reg_size == 5) FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16))); - FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src))); - src = freg; + FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(vreg) | FRJ(src))); + src = vreg; } while (++elem_size < elem2_size); return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3518,7 +3554,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c sljit_s32 dst_r; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); @@ -3539,7 +3575,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c if (reg_size == 5) ins = (sljit_ins)1 << 26; - FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg))); + FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(vreg))); FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1))); @@ -3556,14 +3592,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + ADJUST_LOCAL_OFFSET(src2, src2w); if (reg_size != 5 && reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -3577,6 +3614,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; + if (src2 & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w)); + FAIL_IF(push_inst(compiler, (reg_size == 4 ? VLD : XVLD) | FRD(TMP_FREG1) | RJ(src2) | IMM_I12(0))); + src2 = TMP_FREG1; + } + switch (SLJIT_SIMD_GET_OPCODE(type)) { case SLJIT_SIMD_OP2_AND: ins = VAND_V; @@ -3587,12 +3630,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co case SLJIT_SIMD_OP2_XOR: ins = VXOR_V; break; + case SLJIT_SIMD_OP2_SHUFFLE: + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + return push_inst(compiler, VSHUF_B | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src1_vreg) | FRA(src2)); } if (reg_size == 5) ins |= (sljit_ins)1 << 26; - return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg)); + return push_inst(compiler, ins | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src2)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, @@ -3605,14 +3653,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) { + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + ins = LL_D; + break; + case SLJIT_MOV_S32: + case SLJIT_MOV32: + ins = LL_W; + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg)); + } + switch(GET_OPCODE(op)) { + case SLJIT_MOV_S8: + ins = LD_B; + break; case SLJIT_MOV_U8: ins = LD_BU; break; + case SLJIT_MOV_S16: + ins = LD_H; + break; case SLJIT_MOV_U16: ins = LD_HU; break; case SLJIT_MOV32: + case SLJIT_MOV_S32: ins = LD_W; break; case SLJIT_MOV_U32: @@ -3623,6 +3702,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler break; } + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0)); } @@ -3639,16 +3721,48 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) { + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + ins = SC_D; + break; + case SLJIT_MOV_S32: + case SLJIT_MOV32: + ins = SC_W; + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, ADD_D | RD(OTHER_FLAG) | RJ(src_reg) | RK(TMP_ZERO))); + return push_inst(compiler, ins | RD(OTHER_FLAG) | RJ(mem_reg)); + } + switch (GET_OPCODE(op)) { + case SLJIT_MOV_S8: + ins = AMCAS_B; + break; case SLJIT_MOV_U8: ins = AMCAS_B; unsign = BSTRPICK_D | (7 << 16); break; + case SLJIT_MOV_S16: + ins = AMCAS_H; + break; case SLJIT_MOV_U16: ins = AMCAS_H; unsign = BSTRPICK_D | (15 << 16); break; case SLJIT_MOV32: + case SLJIT_MOV_S32: ins = AMCAS_W; break; case SLJIT_MOV_U32: @@ -3660,9 +3774,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler break; } + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + if (op & SLJIT_SET_ATOMIC_STORED) { - FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO))); - tmp = TMP_REG1; + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG3) | RJ(temp_reg) | RK(TMP_ZERO))); + tmp = TMP_REG3; } FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg))); if (!(op & SLJIT_SET_ATOMIC_STORED)) @@ -3671,8 +3788,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler if (unsign) FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp))); - FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg))); - return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1)); + FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(tmp) | RK(temp_reg))); + return push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | IMM_I12(1)); } static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) diff --git a/src/sljit/sljitNativeMIPS_32.c b/deps/sljit/sljit_src/sljitNativeMIPS_32.c similarity index 100% rename from src/sljit/sljitNativeMIPS_32.c rename to deps/sljit/sljit_src/sljitNativeMIPS_32.c diff --git a/src/sljit/sljitNativeMIPS_64.c b/deps/sljit/sljit_src/sljitNativeMIPS_64.c similarity index 100% rename from src/sljit/sljitNativeMIPS_64.c rename to deps/sljit/sljit_src/sljitNativeMIPS_64.c diff --git a/src/sljit/sljitNativeMIPS_common.c b/deps/sljit/sljit_src/sljitNativeMIPS_common.c similarity index 97% rename from src/sljit/sljitNativeMIPS_common.c rename to deps/sljit/sljit_src/sljitNativeMIPS_common.c index 88eb30b..1b951fe 100644 --- a/src/sljit/sljitNativeMIPS_common.c +++ b/deps/sljit/sljit_src/sljitNativeMIPS_common.c @@ -249,6 +249,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define LDL (HI(26)) #define LDR (HI(27)) #define LDC1 (HI(53)) +#define LL (HI(48)) +#define LLD (HI(52)) #define LUI (HI(15)) #define LW (HI(35)) #define LWL (HI(34)) @@ -288,6 +290,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define ROTR (HI(0) | (1 << 21) | LO(2)) #define ROTRV (HI(0) | (1 << 6) | LO(6)) #endif /* SLJIT_MIPS_REV >= 2 */ +#define SC (HI(56)) +#define SCD (HI(60)) #define SD (HI(63)) #define SDL (HI(44)) #define SDR (HI(45)) @@ -308,6 +312,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define SWL (HI(42)) #define SWR (HI(46)) #define SWC1 (HI(57)) +#define SYNC (HI(0) | LO(15)) #define TRUNC_W_S (HI(17) | FMT_S | LO(13)) #if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 #define WSBH (HI(31) | (2 << 6) | LO(32)) @@ -381,11 +386,21 @@ static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) fr -= SLJIT_F64_SECOND(0); - return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) - || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches)) + || (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0) || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); } +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(vr); + SLJIT_UNUSED_ARG(type); + + /* SIMD is not supported. */ + return 0; +} + #endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_ARGUMENT_CHECKS */ static void get_cpu_features(void) @@ -857,6 +872,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_CLZ: case SLJIT_HAS_CMOV: case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; case SLJIT_HAS_CTZ: @@ -928,17 +945,22 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit #endif SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds); sljit_ins base; sljit_s32 i, tmp, offset; sljit_s32 arg_count, word_arg_count, float_arg_count; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -1138,12 +1160,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds); + CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -2462,6 +2490,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); #endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_MEMORY_BARRIER: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SYNC, UNMOVABLE_INS); +#else /* SLJIT_MIPS_REV < 1 */ + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_MIPS_REV >= 1 */ case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -3312,6 +3346,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: case SLJIT_CARRY: + case SLJIT_ATOMIC_STORED: BR_Z(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -3320,6 +3355,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_CARRY: + case SLJIT_ATOMIC_NOT_STORED: BR_NZ(OTHER_FLAG); break; case SLJIT_F_NOT_EQUAL: @@ -4209,6 +4245,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil #undef TO_ARGW_HI +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + ins = LLD; + break; +#endif /* SLJIT_CONFIG_MIPS_64 */ + case SLJIT_MOV_S32: + case SLJIT_MOV32: + ins = LL; + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | T(dst_reg) | S(mem_reg), DR(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + ins = SCD; + break; +#endif /* SLJIT_CONFIG_RISCV_64 */ + case SLJIT_MOV_S32: + case SLJIT_MOV32: + op |= SLJIT_32; + ins = SC; + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src_reg) | TA(0) | DA(OTHER_FLAG), OTHER_FLAG)); + return push_inst(compiler, ins | TA(OTHER_FLAG) | S(mem_reg), OTHER_FLAG); +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; diff --git a/src/sljit/sljitNativePPC_32.c b/deps/sljit/sljit_src/sljitNativePPC_32.c similarity index 100% rename from src/sljit/sljitNativePPC_32.c rename to deps/sljit/sljit_src/sljitNativePPC_32.c diff --git a/src/sljit/sljitNativePPC_64.c b/deps/sljit/sljit_src/sljitNativePPC_64.c similarity index 100% rename from src/sljit/sljitNativePPC_64.c rename to deps/sljit/sljit_src/sljitNativePPC_64.c diff --git a/src/sljit/sljitNativePPC_common.c b/deps/sljit/sljit_src/sljitNativePPC_common.c similarity index 96% rename from src/sljit/sljitNativePPC_common.c rename to deps/sljit/sljit_src/sljitNativePPC_common.c index 1f17d90..3d2268e 100644 --- a/src/sljit/sljitNativePPC_common.c +++ b/deps/sljit/sljit_src/sljitNativePPC_common.c @@ -187,10 +187,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define LD (HI(58) | 0) #define LFD (HI(50)) #define LFS (HI(48)) +#define LDARX (HI(31) | LO(84)) #if defined(_ARCH_PWR7) && _ARCH_PWR7 #define LDBRX (HI(31) | LO(532)) #endif /* POWER7 */ #define LHBRX (HI(31) | LO(790)) +#define LWARX (HI(31) | LO(20)) #define LWBRX (HI(31) | LO(534)) #define LWZ (HI(32)) #define MFCR (HI(31) | LO(19)) @@ -231,6 +233,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #if defined(_ARCH_PWR7) && _ARCH_PWR7 #define STDBRX (HI(31) | LO(660)) #endif /* POWER7 */ +#define STDCX (HI(31) | LO(214)) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) #define STFD (HI(54)) @@ -239,12 +242,14 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define STHBRX (HI(31) | LO(918)) #define STW (HI(36)) #define STWBRX (HI(31) | LO(662)) +#define STWCX (HI(31) | LO(150)) #define STWU (HI(37)) #define STWUX (HI(31) | LO(183)) #define SUBF (HI(31) | LO(40)) #define SUBFC (HI(31) | LO(8)) #define SUBFE (HI(31) | LO(136)) #define SUBFIC (HI(8)) +#define SYNC (HI(31) | LO(598)) #define XOR (HI(31) | LO(316)) #define XORI (HI(26)) #define XORIS (HI(27)) @@ -314,7 +319,11 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i { sljit_sw diff; sljit_uw target_addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + jump->addr = jump_addr; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) goto exit; @@ -328,6 +337,9 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i else { SLJIT_ASSERT(jump->u.label != NULL); target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -335,7 +347,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i goto keep_address; #endif - diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (jump->flags & IS_COND) { if (diff <= 0x7fff && diff >= -0x8000) { @@ -547,6 +559,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) } else { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (jump->flags & IS_COND) { if (diff <= (0x7fff / SSIZE_OF(ins)) && diff >= (-0x8000 / SSIZE_OF(ins))) @@ -592,6 +608,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw jump_addr; +#endif SLJIT_NEXT_DEFINE_TYPES; sljit_sw executable_offset; @@ -648,9 +667,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (next_min_addr == next_jump_addr) { if (!(jump->flags & JUMP_MOV_ADDR)) { word_count += jump->flags >> JUMP_SIZE_SHIFT; - jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump_addr = (sljit_uw)code_ptr; +#endif code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); - SLJIT_ASSERT(((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + SLJIT_ASSERT(((sljit_uw)code_ptr - jump_addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); } else { jump->addr = (sljit_uw)code_ptr; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -748,6 +769,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_CLZ: case SLJIT_HAS_ROT: case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; case SLJIT_HAS_CTZ: @@ -845,9 +868,11 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag #define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds); sljit_s32 i, tmp, base, offset; sljit_s32 word_arg_count = 0; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); @@ -856,9 +881,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0) + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); @@ -962,13 +989,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds); + CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0) + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); @@ -1399,6 +1431,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #else return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); #endif + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, SYNC); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -2422,6 +2456,7 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type /* fallthrough */ case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: return (12 << 21) | (2 << 16); case SLJIT_CARRY: @@ -2430,6 +2465,7 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type /* fallthrough */ case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: return (4 << 21) | (2 << 16); case SLJIT_LESS: @@ -2686,10 +2722,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: bit = 2; break; case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: bit = 2; invert = 1; break; @@ -3106,6 +3144,78 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + ins = LDARX; + break; +#endif /* SLJIT_CONFIG_RISCV_64 */ + case SLJIT_MOV_U32: + case SLJIT_MOV32: + ins = LWARX; + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | D(dst_reg) | B(mem_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + ins = STDCX | 0x1; + break; +#endif /* SLJIT_CONFIG_RISCV_64 */ + case SLJIT_MOV_U32: + case SLJIT_MOV32: + ins = STWCX | 0x1; + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | D(src_reg) | B(mem_reg)); +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; diff --git a/src/sljit/sljitNativeRISCV_32.c b/deps/sljit/sljit_src/sljitNativeRISCV_32.c similarity index 100% rename from src/sljit/sljitNativeRISCV_32.c rename to deps/sljit/sljit_src/sljitNativeRISCV_32.c diff --git a/src/sljit/sljitNativeRISCV_64.c b/deps/sljit/sljit_src/sljitNativeRISCV_64.c similarity index 100% rename from src/sljit/sljitNativeRISCV_64.c rename to deps/sljit/sljit_src/sljitNativeRISCV_64.c diff --git a/src/sljit/sljitNativeRISCV_common.c b/deps/sljit/sljit_src/sljitNativeRISCV_common.c similarity index 78% rename from src/sljit/sljitNativeRISCV_common.c rename to deps/sljit/sljit_src/sljitNativeRISCV_common.c index d86100a..d3ba46d 100644 --- a/src/sljit/sljitNativeRISCV_common.c +++ b/deps/sljit/sljit_src/sljitNativeRISCV_common.c @@ -50,6 +50,9 @@ typedef sljit_u32 sljit_ins; #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) +#define TMP_VREG1 (SLJIT_NUMBER_OF_VECTOR_REGISTERS + 1) +#define TMP_VREG2 (SLJIT_NUMBER_OF_VECTOR_REGISTERS + 2) + static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { 0, 10, 11, 12, 13, 14, 15, 16, 17, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 2, 6, 1, 7, 5, 28 }; @@ -58,6 +61,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { 0, 10, 11, 12, 13, 14, 15, 16, 17, 2, 3, 4, 5, 6, 7, 28, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 0, 1, }; +static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 +}; + /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ @@ -68,6 +75,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FRD(rd) ((sljit_ins)freg_map[rd] << 7) #define FRS1(rs1) ((sljit_ins)freg_map[rs1] << 15) #define FRS2(rs2) ((sljit_ins)freg_map[rs2] << 20) +#define VRD(rd) ((sljit_ins)vreg_map[rd] << 7) +#define VRS1(rs1) ((sljit_ins)vreg_map[rs1] << 15) +#define VRS2(rs2) ((sljit_ins)vreg_map[rs2] << 20) #define IMM_I(imm) ((sljit_ins)(imm) << 20) #define IMM_S(imm) ((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7)) @@ -77,6 +87,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define F12(f) ((sljit_ins)(f) << 20) #define F7(f) ((sljit_ins)(f) << 25) +/* Vector instruction types. */ +#define OPFVF (F3(0x5) | OPC(0x57)) +#define OPFVV (F3(0x1) | OPC(0x57)) +#define OPIVI (F3(0x3) | OPC(0x57)) +#define OPIVV (F3(0x0) | OPC(0x57)) +#define OPIVX (F3(0x4) | OPC(0x57)) +#define OPMVV (F3(0x2) | OPC(0x57)) +#define OPMVX (F3(0x6) | OPC(0x57)) + #define ADD (F7(0x0) | F3(0x0) | OPC(0x33)) #define ADDI (F3(0x0) | OPC(0x13)) #define AND (F7(0x0) | F3(0x7) | OPC(0x33)) @@ -88,11 +107,16 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define BGE (F3(0x5) | OPC(0x63)) #define BLTU (F3(0x6) | OPC(0x63)) #define BGEU (F3(0x7) | OPC(0x63)) +#if defined __riscv_zbb +#define CLZ (F7(0x30) | F3(0x1) | OPC(0x13)) +#define CTZ (F7(0x30) | F12(0x1) | F3(0x1) | OPC(0x13)) +#endif /* __riscv_zbb */ #define DIV (F7(0x1) | F3(0x4) | OPC(0x33)) #define DIVU (F7(0x1) | F3(0x5) | OPC(0x33)) #define EBREAK (F12(0x1) | F3(0x0) | OPC(0x73)) #define FADD_S (F7(0x0) | F3(0x7) | OPC(0x53)) #define FDIV_S (F7(0xc) | F3(0x7) | OPC(0x53)) +#define FENCE (F3(0x0) | OPC(0xf)) #define FEQ_S (F7(0x50) | F3(0x2) | OPC(0x53)) #define FLD (F3(0x3) | OPC(0x7)) #define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53)) @@ -116,6 +140,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define LD (F3(0x3) | OPC(0x3)) #define LUI (OPC(0x37)) #define LW (F3(0x2) | OPC(0x3)) +#define LR (F7(0x8) | OPC(0x2f)) #define MUL (F7(0x1) | F3(0x0) | OPC(0x33)) #define MULH (F7(0x1) | F3(0x1) | OPC(0x33)) #define MULHU (F7(0x1) | F3(0x3) | OPC(0x33)) @@ -123,21 +148,73 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ORI (F3(0x6) | OPC(0x13)) #define REM (F7(0x1) | F3(0x6) | OPC(0x33)) #define REMU (F7(0x1) | F3(0x7) | OPC(0x33)) +#if defined __riscv_zbb +#if defined SLJIT_CONFIG_RISCV_32 +#define REV8 (F12(0x698) | F3(0x5) | OPC(0x13)) +#elif defined SLJIT_CONFIG_RISCV_64 +#define REV8 (F12(0x6b8) | F3(0x5) | OPC(0x13)) +#endif /* SLJIT_CONFIG_RISCV_32 */ +#define ROL (F7(0x30) | F3(0x1) | OPC(0x33)) +#define ROR (F7(0x30) | F3(0x5) | OPC(0x33)) +#define RORI (F7(0x30) | F3(0x5) | OPC(0x13)) +#endif /* __riscv_zbb */ +#define SC (F7(0xc) | OPC(0x2f)) #define SD (F3(0x3) | OPC(0x23)) +#if defined __riscv_zbb +#define SEXTB (F7(0x30) | F12(0x4) | F3(0x1) | OPC(0x13)) +#define SEXTH (F7(0x30) | F12(0x5) | F3(0x1) | OPC(0x13)) +#endif /* __riscv_zbb */ +#if defined __riscv_zba +#define SH1ADD (F7(0x10) | F3(0x2) | OPC(0x33)) +#define SH2ADD (F7(0x10) | F3(0x4) | OPC(0x33)) +#define SH3ADD (F7(0x10) | F3(0x6) | OPC(0x33)) +#endif /* __riscv_zba */ #define SLL (F7(0x0) | F3(0x1) | OPC(0x33)) -#define SLLI (IMM_I(0x0) | F3(0x1) | OPC(0x13)) +#define SLLI (F3(0x1) | OPC(0x13)) #define SLT (F7(0x0) | F3(0x2) | OPC(0x33)) #define SLTI (F3(0x2) | OPC(0x13)) #define SLTU (F7(0x0) | F3(0x3) | OPC(0x33)) #define SLTUI (F3(0x3) | OPC(0x13)) #define SRL (F7(0x0) | F3(0x5) | OPC(0x33)) -#define SRLI (IMM_I(0x0) | F3(0x5) | OPC(0x13)) +#define SRLI (F3(0x5) | OPC(0x13)) #define SRA (F7(0x20) | F3(0x5) | OPC(0x33)) -#define SRAI (IMM_I(0x400) | F3(0x5) | OPC(0x13)) +#define SRAI (F7(0x20) | F3(0x5) | OPC(0x13)) #define SUB (F7(0x20) | F3(0x0) | OPC(0x33)) #define SW (F3(0x2) | OPC(0x23)) +#define VAND_VV (F7(0x13) | OPIVV) +#define VFMV_FS (F7(0x21) | OPFVV) +#define VFMV_SF (F7(0x21) | OPFVF) +#define VFMV_VF (F7(0x2f) | OPFVF) +#define VFWCVT_FFV (F7(0x25) | (0xc << 15) | OPFVV) +#define VL (F7(0x1) | OPC(0x7)) +#define VMSLE_VI (F7(0x3b) | OPIVI) +#define VMV_SX (F7(0x21) | OPMVX) +#define VMV_VI (F7(0x2f) | OPIVI) +#define VMV_VV (F7(0x2f) | OPIVV) +#define VMV_VX (F7(0x2f) | OPIVX) +#define VMV_XS (F7(0x21) | OPMVV) +#define VOR_VV (F7(0x15) | OPIVV) +#define VSETIVLI (F7(0x60) | F3(0x7) | OPC(0x57)) +#define VS (F7(0x1) | OPC(0x27)) +#define VSLIDEDOWN_VX (F7(0x1f) | OPIVX) +#define VSLIDEDOWN_VI (F7(0x1f) | OPIVI) +#define VSLIDEUP_VX (F7(0x1d) | OPIVX) +#define VSLIDEUP_VI (F7(0x1d) | OPIVI) +#define VRGATHER_VI (F7(0x19) | OPIVI) +#define VRGATHER_VV (F7(0x19) | OPIVV) +#define VXOR_VV (F7(0x17) | OPIVV) +#define VZEXT_VF2 (F7(0x25) | (0x6 << 15) | OPMVV) +#define VZEXT_VF4 (F7(0x25) | (0x4 << 15) | OPMVV) +#define VZEXT_VF8 (F7(0x25) | (0x2 << 15) | OPMVV) #define XOR (F7(0x0) | F3(0x4) | OPC(0x33)) #define XORI (F3(0x4) | OPC(0x13)) +#if defined __riscv_zbb +#if defined SLJIT_CONFIG_RISCV_32 +#define ZEXTH (F7(0x4) | F3(0x4) | OPC(0x33)) +#elif defined SLJIT_CONFIG_RISCV_64 +#define ZEXTH (F7(0x4) | F3(0x4) | OPC(0x3B)) +#endif /* SLJIT_CONFIG_RISCV_32 */ +#endif /* __riscv_zbb */ #define SIMM_MAX (0x7ff) #define SIMM_MIN (-0x800) @@ -151,7 +228,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define S32_MIN (-0x80000000l) #define S44_MAX (0x7fffffff7ffl) #define S52_MAX (0x7ffffffffffffl) -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { @@ -167,14 +244,15 @@ static sljit_s32 push_imm_s_inst(struct sljit_compiler *compiler, sljit_ins ins, return push_inst(compiler, ins | IMM_S(imm)); } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - sljit_ins *inst; - - inst = (sljit_ins *)jump->addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + jump->addr = jump_addr; if (jump->flags & SLJIT_REWRITABLE_JUMP) goto exit; @@ -182,20 +260,23 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->u.label != NULL); - target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } - diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) { - inst--; - inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000; + code_ptr--; + code_ptr[0] = (code_ptr[0] & 0x1fff07f) ^ 0x1000; jump->flags |= PATCH_B; - jump->addr = (sljit_uw)inst; - return inst; + jump->addr = (sljit_uw)code_ptr; + return code_ptr; } diff -= SSIZE_OF(ins); @@ -204,62 +285,62 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (diff >= JUMP_MIN && diff <= JUMP_MAX) { if (jump->flags & IS_COND) { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; -#else - inst[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7; -#endif + code_ptr[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; +#else /* !SLJIT_CONFIG_RISCV_32 */ + code_ptr[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7; +#endif /* SLJIT_CONFIG_RISCV_32 */ } jump->flags |= PATCH_J; - return inst; + return code_ptr; } #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) if (diff >= S32_MIN && diff <= S32_MAX) { if (jump->flags & IS_COND) - inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + code_ptr[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; jump->flags |= PATCH_REL32; - inst[1] = inst[0]; - return inst + 1; + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; } if (target_addr <= (sljit_uw)S32_MAX) { if (jump->flags & IS_COND) - inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + code_ptr[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; jump->flags |= PATCH_ABS32; - inst[1] = inst[0]; - return inst + 1; + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; } if (target_addr <= S44_MAX) { if (jump->flags & IS_COND) - inst[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7; + code_ptr[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7; jump->flags |= PATCH_ABS44; - inst[3] = inst[0]; - return inst + 3; + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; } if (target_addr <= S52_MAX) { if (jump->flags & IS_COND) - inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; + code_ptr[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; jump->flags |= PATCH_ABS52; - inst[4] = inst[0]; - return inst + 4; + code_ptr[4] = code_ptr[0]; + return code_ptr + 4; } -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ exit: #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) - inst[1] = inst[0]; - return inst + 1; -#else - inst[5] = inst[0]; - return inst + 5; -#endif + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; +#else /* !SLJIT_CONFIG_RISCV_32 */ + code_ptr[5] = code_ptr[0]; + return code_ptr + 5; +#endif /* SLJIT_CONFIG_RISCV_32 */ } #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) @@ -267,16 +348,21 @@ exit: static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_uw addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; sljit_sw diff; SLJIT_UNUSED_ARG(executable_offset); SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); if (jump->flags & JUMP_ADDR) addr = jump->u.target; - else + else { addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); - diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (diff >= S32_MIN && diff <= S32_MAX) { SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); @@ -316,7 +402,7 @@ static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw exec sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1; #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) sljit_sw high; -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ SLJIT_UNUSED_ARG(executable_offset); #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) @@ -337,7 +423,7 @@ static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw exec ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); return; } -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ if ((addr & 0x800) != 0) addr += 0x1000; @@ -453,6 +539,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) } else { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH_MIN / SSIZE_OF(ins))) total_size = 0; @@ -474,6 +564,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & JUMP_ADDR)) { /* Real size minus 1. Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) total_size = 1; @@ -552,8 +646,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (next_min_addr == next_jump_addr) { if (!(jump->flags & JUMP_MOV_ADDR)) { word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); - jump->addr = (sljit_uw)code_ptr; - code_ptr = detect_jump_type(jump, code, executable_offset); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); } else { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) @@ -642,7 +735,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return (SLJIT_IS_FPU_AVAILABLE) != 0; #elif defined(__riscv_float_abi_soft) return 0; -#else +#else /* !SLJIT_IS_FPU_AVAILABLE && !__riscv_float_abi_soft */ return 1; #endif /* SLJIT_IS_FPU_AVAILABLE */ case SLJIT_HAS_ZERO_REGISTER: @@ -650,7 +743,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_HAS_COPY_F64: #endif /* !SLJIT_CONFIG_RISCV_64 */ + case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: +#ifdef __riscv_vector + case SLJIT_HAS_SIMD: +#endif /* __riscv_vector */ + return 1; +#ifdef __riscv_zbb + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: return 1; +#endif /* __riscv_zbb */ default: return 0; } @@ -708,32 +813,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) #define STACK_STORE SW #define STACK_LOAD LW -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ #define STACK_STORE SD #define STACK_LOAD LD -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) #include "sljitNativeRISCV_32.c" -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ #include "sljitNativeRISCV_64.c" -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ #define STACK_MAX_DISTANCE (-SIMM_MIN) static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds); sljit_s32 i, tmp, offset; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { @@ -741,9 +850,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size += SSIZE_OF(sw); local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); } -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; compiler->local_size = local_size; @@ -778,7 +887,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi /* This alignment is valid because offset is not used after storing FPU regs. */ if ((offset & SSIZE_OF(sw)) != 0) offset -= SSIZE_OF(sw); -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ tmp = SLJIT_FS0 - fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { @@ -821,13 +930,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #undef STACK_MAX_DISTANCE SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches); + sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds); + CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { @@ -835,9 +949,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp local_size += SSIZE_OF(sw); local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); } -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; return SLJIT_SUCCESS; @@ -883,7 +997,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit /* This alignment is valid because offset is not used after storing FPU regs. */ if ((offset & SSIZE_OF(sw)) != 0) offset -= SSIZE_OF(sw); -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ tmp = SLJIT_FS0 - compiler->fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { @@ -939,9 +1053,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *c #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) #define ARCH_32_64(a, b) a -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ #define ARCH_32_64(a, b) b -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ static const sljit_ins data_transfer_insts[16 + 4] = { /* u w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), @@ -1034,6 +1148,9 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl sljit_s32 base = arg & REG_MASK; sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; sljit_sw offset, argw_hi; +#if defined __riscv_zba + sljit_ins ins = ADD; +#endif /* __riscv_zba */ SLJIT_ASSERT(arg & SLJIT_MEM); if (!(next_arg & SLJIT_MEM)) { @@ -1044,6 +1161,20 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; +#if defined __riscv_zba + switch (argw) { + case 1: + ins = SH1ADD; + break; + case 2: + ins = SH2ADD; + break; + case 3: + ins = SH3ADD; + break; + } + FAIL_IF(push_inst(compiler, ins | RD(tmp_r) | RS1(OFFS_REG(arg)) | RS2(base))); +#else /* !__riscv_zba */ /* Using the cache. */ if (argw == compiler->cache_argw) { if (arg == compiler->cache_arg) @@ -1075,6 +1206,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl } else FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); +#endif /* __riscv_zba */ + return push_mem_inst(compiler, flags, reg, tmp_r, 0); } @@ -1161,7 +1294,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji #define WORD_32 0x08 #define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v)))) #endif /* SLJIT_CONFIG_RISCV_32 */ - +#ifndef __riscv_zbb static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) { sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); @@ -1264,6 +1397,7 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | WORD | RD(dst) | RS1(dst) | IMM_I(word_size - 16))); return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)); } +#endif /* !__riscv_zbb */ #define EMIT_LOGICAL(op_imm, op_reg) \ if (flags & SRC2_IMM) { \ @@ -1309,6 +1443,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_MOV_S8: +#if defined __riscv_zbb + return push_inst(compiler, SEXTB | RD(dst) | RS1(src2)); +#else /* !__riscv_zbb */ SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24))); @@ -1316,8 +1453,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; +#endif /* __riscv_zbb */ case SLJIT_MOV_U16: +#if defined __riscv_zbb + return push_inst(compiler, ZEXTH | RD(dst) | RS1(src2)); +#else /* !__riscv_zbb */ SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); @@ -1325,8 +1466,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; +#endif /* __riscv_zbb */ case SLJIT_MOV_S16: +#if defined __riscv_zbb + return push_inst(compiler, SEXTH | RD(dst) | RS1(src2)); +#else /* !__riscv_zbb */ SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); @@ -1334,6 +1479,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; +#endif /* !__riscv_zbb */ #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) case SLJIT_MOV_U32: @@ -1354,24 +1500,59 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #endif /* SLJIT_CONFIG_RISCV_64 */ case SLJIT_CLZ: +#if defined __riscv_zbb + return push_inst(compiler, CLZ | WORD | RD(dst) | RS1(src2)); +#endif /* __riscv_zbb */ case SLJIT_CTZ: +#if defined __riscv_zbb + return push_inst(compiler, CTZ | WORD | RD(dst) | RS1(src2)); +#else /* !__riscv_zbb */ SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); return emit_clz_ctz(compiler, op, dst, src2); +#endif /* __riscv_zbb */ case SLJIT_REV: +#if defined __riscv_zbb + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, REV8 | RD(dst) | RS1(src2))); +#if defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64 + if (op & SLJIT_32) + return push_inst(compiler, SRAI | RD(dst) | RS1(dst) | IMM_I(32)); + return SLJIT_SUCCESS; +#else /* !SLJIT_CONFIG_RISCV_64 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_RISCV_64 */ +#endif /* __riscv_zbb */ case SLJIT_REV_S32: -#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#if ((defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || defined __riscv_zbb) case SLJIT_REV_U32: -#endif /* SLJIT_CONFIG_RISCV_32 */ +#endif /* SLJIT_CONFIG_RISCV_32 || __riscv_zbb */ SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); +#if defined __riscv_zbb + FAIL_IF(push_inst(compiler, REV8 | RD(dst) | RS1(src2))); +#if defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64 + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U32 ? SRLI : SRAI )| RD(dst) | RS1(dst) | IMM_I(32)); +#else /* !SLJIT_CONFIG_RISCV_64 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_RISCV_64 */ +#else /* !__riscv_zbb */ return emit_rev(compiler, op, dst, src2); - +#endif /* __riscv_zbb */ case SLJIT_REV_U16: case SLJIT_REV_S16: SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); +#if defined __riscv_zbb + FAIL_IF(push_inst(compiler, REV8 | RD(dst) | RS1(src2))); +#if defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64 + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI )| RD(dst) | RS1(dst) | IMM_I(48)); +#else /* !SLJIT_CONFIG_RISCV_64 */ + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | RD(dst) | RS1(dst) | IMM_I(16)); +#endif /* SLJIT_CONFIG_RISCV_64 */ +#else /* !__riscv_zbb */ return emit_rev16(compiler, op, dst, src2); +#endif /* __riscv_zbb */ -#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#if ((defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) && !defined __riscv_zbb) case SLJIT_REV_U32: SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); FAIL_IF(emit_rev(compiler, op, dst, src2)); @@ -1379,8 +1560,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(32))); return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); -#endif /* SLJIT_CONFIG_RISCV_32 */ - +#endif /* SLJIT_CONFIG_RISCV_64 && !__riscv_zbb */ case SLJIT_ADD: /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; @@ -1668,7 +1848,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ROTR: if (flags & SRC2_IMM) { SLJIT_ASSERT(src2 != 0); - +#if defined __riscv_zbb + if (GET_OPCODE(op) == SLJIT_ROTL) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + src2 = ((op & SLJIT_32) ? 32 : 64) - src2; +#else /* !SLJIT_CONFIG_RISCV_64 */ + src2 = 32 - src2; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } + return push_inst(compiler, RORI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)); +#else /* !__riscv_zbb */ op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLI : SRLI; FAIL_IF(push_inst(compiler, op_imm | WORD | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); @@ -1680,8 +1869,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLI : SLLI; FAIL_IF(push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)); +#endif /* !__riscv_zbb */ } +#if defined __riscv_zbb + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_ROTL ? ROL : ROR) | WORD | RD(dst) | RS1(src1) | RS2(src2)); +#else /* !__riscv_zbb */ if (src2 == TMP_ZERO) { if (dst != src1) return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(0)); @@ -1694,7 +1887,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL; FAIL_IF(push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(EQUAL_FLAG))); return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)); - +#endif /* !riscv_zbb */ default: SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; @@ -1881,6 +2074,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); case SLJIT_DIV_SW: return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, FENCE | 0x0ff00000); case SLJIT_ENDBR: case SLJIT_SKIP_FRAMES_BEFORE_RETURN: return SLJIT_SUCCESS; @@ -1903,7 +2098,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) if (op & SLJIT_32) flags = INT_DATA | SIGNED_DATA; -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1911,7 +2106,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV_U32: case SLJIT_MOV_S32: case SLJIT_MOV32: -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); @@ -1923,7 +2118,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile /* Logical operators have no W variant, so sign extended input is necessary for them. */ case SLJIT_MOV32: return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ case SLJIT_MOV_U8: return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); @@ -1976,7 +2171,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (src2 == SLJIT_IMM) src2w = (sljit_s32)src2w; } -#endif +#endif /* SLJIT_CONFIG_RISCV_64 */ switch (GET_OPCODE(op)) { case SLJIT_ADD: @@ -2204,10 +2399,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, slji if (type == SLJIT_GP_REGISTER) return reg_map[reg]; - if (type != SLJIT_FLOAT_REGISTER) - return -1; + if (type == SLJIT_FLOAT_REGISTER) + return freg_map[reg]; - return freg_map[reg]; + return vreg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -2234,9 +2429,9 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) # define flags (sljit_u32)0 -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; if (src & SLJIT_MEM) { @@ -2250,15 +2445,15 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp if (dst & SLJIT_MEM) { #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) return emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ return emit_op_mem2(compiler, flags ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0); -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ } return SLJIT_SUCCESS; #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) # undef flags -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ } static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, @@ -2556,9 +2751,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) #define BRANCH_LENGTH ((sljit_ins)(3 * sizeof(sljit_ins)) << 7) -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ #define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7) -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ static sljit_ins get_jump_instruction(sljit_s32 type) { @@ -2573,6 +2768,7 @@ static sljit_ins get_jump_instruction(sljit_s32 type) case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: case SLJIT_CARRY: + case SLJIT_ATOMIC_NOT_STORED: case SLJIT_F_EQUAL: case SLJIT_ORDERED_EQUAL: case SLJIT_ORDERED_NOT_EQUAL: @@ -2591,6 +2787,7 @@ static sljit_ins get_jump_instruction(sljit_s32 type) case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_CARRY: + case SLJIT_ATOMIC_STORED: case SLJIT_F_NOT_EQUAL: case SLJIT_UNORDERED_OR_NOT_EQUAL: case SLJIT_UNORDERED_OR_EQUAL: @@ -2687,7 +2884,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + PTR_FAIL_IF(emit_op_mem2(compiler, flags | (src1 == TMP_REG1 ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, 0, 0)); src2 = src2_tmp_reg; } @@ -2825,9 +3022,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 saved_op = op; #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) sljit_s32 mem_type = WORD_DATA; -#else +#else /* !SLJIT_CONFIG_RISCV_32 */ sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; -#endif +#endif /* SLJIT_CONFIG_RISCV_32 */ CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); @@ -2862,6 +3059,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co src_r = dst_r; invert ^= 0x1; break; + case SLJIT_ATOMIC_STORED: + case SLJIT_ATOMIC_NOT_STORED: + invert ^= 0x1; + break; } } else { invert = 0; @@ -3066,6 +3267,561 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile #undef TO_ARGW_HI +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + ins = LR | (3 << 12); + break; +#endif /* SLJIT_CONFIG_RISCV_64 */ + case SLJIT_MOV_S32: + case SLJIT_MOV32: + ins = LR | (2 << 12); + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | RD(dst_reg) | RS1(mem_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + if (op & SLJIT_ATOMIC_USE_CAS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + ins = SC | (3 << 12); + break; +#endif /* SLJIT_CONFIG_RISCV_64 */ + case SLJIT_MOV_S32: + case SLJIT_MOV32: + ins = SC | (2 << 12); + break; + + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | RD(OTHER_FLAG) | RS1(mem_reg) | RS2(src_reg)); +} + +/* + SEW = Selected element width + LMUL = Vector register group multiplier + + VLMUL values (in binary): + 100 : reserved + 101 : 1/8 + 110 : 1/4 + 111 : 1/2 + 000 : 1 + 001 : 2 + 010 : 4 + 011 : 8 +*/ + +static SLJIT_INLINE sljit_s32 sljit_emit_vsetivli(struct sljit_compiler *compiler, sljit_s32 type, sljit_ins vlmul) +{ + sljit_ins elem_size = (sljit_ins)SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins avl = (sljit_ins)1 << (SLJIT_SIMD_GET_REG_SIZE(type) - elem_size); + + return push_inst(compiler, VSETIVLI | RD(TMP_REG1) | (elem_size << 23) | (vlmul << 20) | (avl << 15)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_vsetivli_size(struct sljit_compiler *compiler, sljit_s32 reg_size, sljit_s32 elem_size) +{ + sljit_ins avl = (sljit_ins)1 << (reg_size - elem_size); + return push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (avl << 15)); +} + +static sljit_s32 sljit_emit_vmem(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 elem_size, sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 base = mem & REG_MASK; + + if (elem_size > 0) + ins |= (1 << 14) | ((sljit_ins)elem_size << 12); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (SLJIT_UNLIKELY(memw)) { + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(OFFS_REG(mem)) | IMM_I(memw))); + } + + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(base) | RS2(!memw ? OFFS_REG(mem) : TMP_REG1))); + return push_inst(compiler, ins | RS1(TMP_REG1)); + } + + if (memw == 0) + return push_inst(compiler, ins | RS1(base)); + + if (memw <= SIMM_MAX && memw >= SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(base) | IMM_I(memw))); + return push_inst(compiler, ins | RS1(TMP_REG1)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3)); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(base))); + + return push_inst(compiler, ins | RS1(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 vreg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (elem_size > 3) + elem_size = 3; + + FAIL_IF(sljit_emit_vsetivli_size(compiler, reg_size, elem_size)); + + if (srcdst & SLJIT_MEM) { + ins = (type & SLJIT_SIMD_STORE) ? VS : VL; + return sljit_emit_vmem(compiler, ins | VRD(vreg), elem_size, srcdst, srcdstw); + } + + if (type & SLJIT_SIMD_STORE) + ins = VRD(srcdst) | VRS1(vreg); + else + ins = VRD(vreg) | VRS1(srcdst); + + return push_inst(compiler, VMV_VV | ins); +} + +static sljit_s32 sljit_simd_get_mem_flags(sljit_s32 elem_size) +{ + switch (elem_size) { + case 0: + return BYTE_DATA; + case 1: + return HALF_DATA; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case 2: + return INT_DATA; +#endif /* SLJIT_CONFIG_RISCV_64 */ + default: + return WORD_DATA; + } +} + +static sljit_sw sljit_simd_get_imm(sljit_s32 elem_size, sljit_sw imm) +{ + switch (elem_size) { + case 0: + return (sljit_s8)imm; + case 1: + return (sljit_s16)imm; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case 2: + return (sljit_s32)imm; +#endif /* SLJIT_CONFIG_RISCV_64 */ + default: + return imm; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 vreg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2) + return SLJIT_ERR_UNSUPPORTED; +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + FAIL_IF(sljit_emit_vsetivli(compiler, type, 0)); + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, VMV_VI | VRD(vreg) | ((sljit_ins)(srcw & 0x1f) << 15)); + + if (src & SLJIT_MEM) { + flags = (elem_size == 2) ? SINGLE_DATA : DOUBLE_DATA; + FAIL_IF(emit_op_mem(compiler, flags | LOAD_DATA, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + return push_inst(compiler, VFMV_VF | VRD(vreg) | FRS1(src)); + } + + if (src == SLJIT_IMM) { + srcw = sljit_simd_get_imm(elem_size, srcw); + + if (srcw >= -0x10 && srcw <= 0xf) + return push_inst(compiler, VMV_VI | VRD(vreg) | ((sljit_ins)(srcw & 0x1f) << 15)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3)); + src = TMP_REG1; + } else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, sljit_simd_get_mem_flags(elem_size) | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + return push_inst(compiler, VMV_VX | VRD(vreg) | RS1(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 vreg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2) + return SLJIT_ERR_UNSUPPORTED; +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_STORE) { + FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (1 << 15))); + + if (lane_index > 0) { + FAIL_IF(push_inst(compiler, VSLIDEDOWN_VI | VRD(TMP_VREG1) | ((sljit_ins)lane_index << 15) | VRS2(vreg))); + vreg = TMP_VREG1; + } + + if (srcdst & SLJIT_MEM) + return sljit_emit_vmem(compiler, VS | VRD(vreg), elem_size, srcdst, srcdstw); + + if (type & SLJIT_SIMD_FLOAT) + return push_inst(compiler, VFMV_FS | FRD(srcdst) | VRS2(vreg)); + + FAIL_IF(push_inst(compiler, VMV_XS | RD(srcdst) | VRS2(vreg))); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if ((type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 2) + return SLJIT_SUCCESS; +#else /* !SLJIT_CONFIG_RISCV_32 */ + if ((type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3 || (elem_size == 2 && (type & SLJIT_32))) + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (elem_size == 0) + return push_inst(compiler, ANDI | RD(srcdst) | RS1(srcdst) | IMM_I(0xff)); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + flags = 16; +#else /* !SLJIT_CONFIG_RISCV_32 */ + flags = (elem_size == 1) ? 48 : 32; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + FAIL_IF(push_inst(compiler, SLLI | RD(srcdst) | RS1(srcdst) | IMM_I(flags))); + return push_inst(compiler, SRLI | RD(srcdst) | RS1(srcdst) | IMM_I(flags)); + } + + if (type & SLJIT_SIMD_LANE_ZERO) { + FAIL_IF(sljit_emit_vsetivli(compiler, type, 0)); + FAIL_IF(push_inst(compiler, VMV_VI | VRD(vreg))); + } + + if (srcdst & SLJIT_MEM) { + FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (1 << 15))); + FAIL_IF(sljit_emit_vmem(compiler, VL | VRD(lane_index > 0 ? TMP_VREG1 : vreg), elem_size, srcdst, srcdstw)); + + if (lane_index == 0) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | ((sljit_ins)(lane_index + 1) << 15))); + return push_inst(compiler, VSLIDEUP_VI | VRD(vreg) | ((sljit_ins)lane_index << 15) | VRS2(TMP_VREG1)); + } + + if (!(type & SLJIT_SIMD_LANE_ZERO) || lane_index > 0) + FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | ((sljit_ins)(lane_index + 1) << 15))); + + if (type & SLJIT_SIMD_FLOAT) { + FAIL_IF(push_inst(compiler, VFMV_SF | VRD(lane_index > 0 ? TMP_VREG1 : vreg) | FRS1(srcdst))); + + if (lane_index == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, VSLIDEUP_VI | VRD(vreg) | ((sljit_ins)lane_index << 15) | VRS2(TMP_VREG1)); + } + + if (srcdst == SLJIT_IMM) { + srcdstw = sljit_simd_get_imm(elem_size, srcdstw); + FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw, TMP_REG3)); + srcdst = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, VMV_SX | VRD(lane_index > 0 ? TMP_VREG1 : vreg) | RS1(srcdst))); + + if (lane_index == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, VSLIDEUP_VI | VRD(vreg) | ((sljit_ins)lane_index << 15) | VRS2(TMP_VREG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 vreg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + FAIL_IF(sljit_emit_vsetivli(compiler, type, 0)); + + FAIL_IF(push_inst(compiler, VRGATHER_VI | VRD(vreg != src ? vreg : TMP_VREG1) | ((sljit_ins)src_lane_index << 15) | VRS2(src))); + if (vreg == src) + return push_inst(compiler, VMV_VV | VRD(vreg) | VRS1(TMP_VREG1)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 vreg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2) + return SLJIT_ERR_UNSUPPORTED; +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if ((src & SLJIT_MEM) || vreg == src) { + ins = (sljit_ins)1 << (reg_size - elem2_size); + FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (ins << 15))); + + if (src & SLJIT_MEM) + FAIL_IF(sljit_emit_vmem(compiler, VL | VRD(TMP_VREG1), elem_size, src, srcw)); + else + FAIL_IF(push_inst(compiler, VMV_VV | VRD(TMP_VREG1) | VRS1(src))); + + src = TMP_VREG1; + } + + if (type & SLJIT_SIMD_FLOAT) { + FAIL_IF(sljit_emit_vsetivli(compiler, type, 0x7)); + return push_inst(compiler, VFWCVT_FFV | VRD(vreg) | VRS2(src)); + } + + ins = (sljit_ins)1 << (reg_size - elem2_size); + FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem2_size << 23) | (ins << 15))); + + switch (elem2_size - elem_size) { + case 1: + ins = VZEXT_VF2; + break; + case 2: + ins = VZEXT_VF4; + break; + default: + ins = VZEXT_VF8; + break; + } + + if (type & SLJIT_SIMD_EXTEND_SIGNED) + ins |= 1 << 15; + + return push_inst(compiler, ins | VRD(vreg) | VRS2(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 vreg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3) + return SLJIT_ERR_UNSUPPORTED; + + FAIL_IF(sljit_emit_vsetivli(compiler, type, 0)); + FAIL_IF(push_inst(compiler, VMV_VI | VRD(TMP_VREG1) | (0x0 << 15))); + FAIL_IF(push_inst(compiler, VMSLE_VI | VRD(TMP_VREG1) | (0x0 << 15) | VRS2(vreg))); + + FAIL_IF(sljit_emit_vsetivli_size(compiler, 2, 2)); + FAIL_IF(push_inst(compiler, VMV_XS | RD(dst_r) | VRS2(TMP_VREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, (type & SLJIT_32) ? INT_DATA : WORD_DATA, dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND_VV; + break; + case SLJIT_SIMD_OP2_OR: + ins = VOR_VV; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VXOR_VV; + break; + case SLJIT_SIMD_OP2_SHUFFLE: + ins = VRGATHER_VV; + elem_size = 0; + break; + } + + if (elem_size > 3) + elem_size = 3; + + FAIL_IF(sljit_emit_vsetivli_size(compiler, reg_size, elem_size)); + + if (src2 & SLJIT_MEM) { + FAIL_IF(sljit_emit_vmem(compiler, VL | VRD(TMP_VREG1), elem_size, src2, src2w)); + src2 = TMP_VREG1; + } + + if (SLJIT_SIMD_GET_OPCODE(type) != SLJIT_SIMD_OP2_SHUFFLE) + return push_inst(compiler, ins | VRD(dst_vreg) | VRS1(src1_vreg) | VRS2(src2)); + + if (dst_vreg == src2) { + FAIL_IF(push_inst(compiler, VMV_VV | VRD(TMP_VREG1) | VRS1(src2))); + src2 = TMP_VREG1; + } + + if (dst_vreg == src1_vreg) { + FAIL_IF(push_inst(compiler, VMV_VV | VRD(TMP_VREG2) | VRS1(src1_vreg))); + src1_vreg = TMP_VREG2; + } + + return push_inst(compiler, ins | VRD(dst_vreg) | VRS1(src2) | VRS2(src1_vreg)); +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; diff --git a/src/sljit/sljitNativeS390X.c b/deps/sljit/sljit_src/sljitNativeS390X.c similarity index 96% rename from src/sljit/sljitNativeS390X.c rename to deps/sljit/sljit_src/sljitNativeS390X.c index 99e8463..7ce9f9f 100644 --- a/src/sljit/sljitNativeS390X.c +++ b/deps/sljit/sljit_src/sljitNativeS390X.c @@ -1638,6 +1638,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F64: case SLJIT_HAS_SIMD: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; case SLJIT_HAS_CTZ: @@ -1660,19 +1661,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) /* --------------------------------------------------------------------- */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_s32 offset, i, tmp; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); /* Saved registers are stored in callee allocated save area. */ SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13); + scratches = ENTER_GET_REGS(scratches); + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + offset = 2 * SSIZE_OF(sw); if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { if (saved_arg_count == 0) { @@ -1756,12 +1764,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; return SLJIT_SUCCESS; @@ -1923,7 +1931,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return SLJIT_SUCCESS; case SLJIT_DIV_S32: case SLJIT_DIVMOD_S32: - FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, 0xeb00000000dc /* srak */ | R36A(tmp0) | R32A(arg0) | (31 << 16))); FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); FAIL_IF(push_inst(compiler, dr(tmp0, arg1))); FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ @@ -1950,6 +1958,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ return SLJIT_SUCCESS; + case SLJIT_MEMORY_BARRIER: + return push_inst(compiler, 0x0700 /* bcr */ | (0xe << 4) | 0); case SLJIT_ENDBR: return SLJIT_SUCCESS; case SLJIT_SKIP_FRAMES_BEFORE_RETURN: @@ -2475,14 +2485,9 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } - } - else { - if ((op & SLJIT_32) || is_u32(src2w)) { - ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; - return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); - } - if (is_s16(src2w)) - return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w); + } else if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } } else if (src2 & SLJIT_MEM) { @@ -3182,7 +3187,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, slji if (type == SLJIT_GP_REGISTER) return (sljit_s32)gpr(reg); - if (type != SLJIT_FLOAT_REGISTER) + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128) return -1; return (sljit_s32)freg_map[reg]; @@ -3934,7 +3939,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3944,7 +3949,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3959,15 +3964,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co if (!(srcdst & SLJIT_MEM)) { if (type & SLJIT_SIMD_STORE) - ins = F36(srcdst) | F32(freg); + ins = F36(srcdst) | F32(vreg); else - ins = F36(freg) | F32(srcdst); + ins = F36(vreg) | F32(srcdst); return push_inst(compiler, 0xe70000000056 /* vlr */ | ins); } FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1)); - ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); if (alignment >= 4) ins |= 4 << 12; @@ -3978,7 +3983,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3988,7 +3993,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil sljit_sw sign_ext; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -4003,15 +4008,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (src & SLJIT_MEM) { FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); - return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg) + return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12)); } if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) - return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)); + return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)); - return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12)); + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src) | ((sljit_ins)elem_size << 12)); } if (src == SLJIT_IMM) { @@ -4043,10 +4048,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (sign_ext != 0x10000) { if (sign_ext == 0 || sign_ext == -1) - return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg) + return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg) | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16))); - return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg) + return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(vreg) | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12)); } @@ -4055,12 +4060,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil } else reg = gpr(src); - FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12))); - return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12)); + FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ((sljit_ins)elem_size << 12))); + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(vreg) | ((sljit_ins)elem_size << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4070,7 +4075,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile sljit_ins ins = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -4085,20 +4090,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (srcdst & SLJIT_MEM) { FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1)); - ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); } if (type & SLJIT_SIMD_LANE_ZERO) { if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1)) return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12)); - if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { - FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg))); + if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) { + FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(vreg))); srcdst = TMP_FREG1; srcdstw = 0; } - FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg))); + FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg))); } if (srcdst & SLJIT_MEM) { @@ -4126,19 +4131,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (type & SLJIT_SIMD_FLOAT) { if (type & SLJIT_SIMD_STORE) - return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12)); + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(vreg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12)); if (elem_size == 3) { if (lane_index == 0) - ins = F32(srcdst) | F28(freg) | (1 << 12); + ins = F32(srcdst) | F28(vreg) | (1 << 12); else - ins = F32(freg) | F28(srcdst); + ins = F32(vreg) | F28(srcdst); - return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins); + return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(vreg) | ins); } FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12))); - return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12)); + return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12)); } if (srcdst == SLJIT_IMM) { @@ -4167,7 +4172,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } if (ins != 0) - return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12)); + return push_inst(compiler, ins | F36(vreg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12)); push_load_imm_inst(compiler, tmp0, srcdstw); reg = tmp0; @@ -4177,9 +4182,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12); if (!(type & SLJIT_SIMD_STORE)) - return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins); + return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ins); - FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins)); + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(vreg) | ins)); if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3) return SLJIT_SUCCESS; @@ -4200,14 +4205,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); if (reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4218,12 +4223,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src) | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4233,7 +4238,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_ins ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -4248,7 +4253,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler if (src & SLJIT_MEM) { FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); - ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); switch (elem2_size - elem_size) { case 1: @@ -4263,27 +4268,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler } FAIL_IF(push_inst(compiler, ins)); - src = freg; + src = vreg; } if (type & SLJIT_SIMD_FLOAT) { - FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12))); - FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12))); - return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12)); + FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(vreg) | F32(src) | (2 << 12))); + FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(vreg) | F32(vreg) | (32 << 16) | (3 << 12))); + return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(vreg) | F32(vreg) | (2 << 12)); } - ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg); + ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(vreg); do { FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12))); - src = freg; + src = vreg; } while (++elem_size < elem2_size); return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4291,7 +4296,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c sljit_gpr dst_r; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); @@ -4324,7 +4329,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c if (elem_size != 0) FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12))); - FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1))); + FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(vreg) | F28(TMP_FREG1))); dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1) @@ -4337,14 +4342,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); - sljit_ins ins = 0; + sljit_s32 alignment; + struct addr addr; + sljit_ins ins = 0, load_ins; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + ADJUST_LOCAL_OFFSET(src2, src2w); if (reg_size != 4) return SLJIT_ERR_UNSUPPORTED; @@ -4365,12 +4373,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co case SLJIT_SIMD_OP2_XOR: ins = 0xe7000000006d /* vx */; break; + case SLJIT_SIMD_OP2_SHUFFLE: + ins = 0xe7000000008c /* vperm */; + break; } - if (type & SLJIT_SIMD_TEST) - return SLJIT_SUCCESS; + if (src2 & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, src2, src2w, tmp1)); + load_ins = 0xe70000000006 /* vl */ | F36(TMP_FREG1) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + + if (alignment >= 4) + load_ins |= 4 << 12; + else if (alignment == 3) + load_ins |= 3 << 12; + + FAIL_IF(push_inst(compiler, load_ins)); + src2 = TMP_FREG1; + } + + if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) + return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src1_vreg) | F12(src2)); - return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg)); + return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src2)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, @@ -4380,8 +4405,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); - SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); + if (op & SLJIT_ATOMIC_USE_LS) + return SLJIT_ERR_UNSUPPORTED; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + case SLJIT_MOV: + case SLJIT_MOV_P: + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); + default: + return SLJIT_ERR_UNSUPPORTED; + } } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, @@ -4389,44 +4428,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler sljit_s32 mem_reg, sljit_s32 temp_reg) { - sljit_ins mask; + sljit_ins ins; sljit_gpr tmp_r = gpr(temp_reg); sljit_gpr mem_r = gpr(mem_reg); CHECK_ERROR(); CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + if (op & SLJIT_ATOMIC_USE_LS) + return SLJIT_ERR_UNSUPPORTED; + switch (GET_OPCODE(op)) { case SLJIT_MOV32: case SLJIT_MOV_U32: - return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r)); - case SLJIT_MOV_U8: - mask = 0xff; + ins = 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r); break; - case SLJIT_MOV_U16: - mask = 0xffff; + case SLJIT_MOV: + case SLJIT_MOV_P: + ins = 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r); break; default: - return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r)); + return SLJIT_ERR_UNSUPPORTED; } - /* tmp0 = (src_reg ^ tmp_r) & mask */ - FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask)); - FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r))); - FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc)); - FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1))); - - /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */ - FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10))); - FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r))); - FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8))); - FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1))); - - /* Already computed: tmp_r = mem_r & ~0x3 */ + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r))); - FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1))); - return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r)); + return push_inst(compiler, ins); } /* --------------------------------------------------------------------- */ diff --git a/src/sljit/sljitNativeX86_32.c b/deps/sljit/sljit_src/sljitNativeX86_32.c similarity index 98% rename from src/sljit/sljitNativeX86_32.c rename to deps/sljit/sljit_src/sljitNativeX86_32.c index 59ea04a..217a149 100644 --- a/src/sljit/sljitNativeX86_32.c +++ b/deps/sljit/sljit_src/sljitNativeX86_32.c @@ -311,8 +311,8 @@ static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_pt #define ENTER_TMP_TO_S 0x00002 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { sljit_s32 word_arg_count, saved_arg_count, float_arg_count; sljit_s32 size, args_size, types, status; @@ -323,8 +323,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); /* Emit ENDBR32 at function entry if needed. */ FAIL_IF(emit_endbranch(compiler)); @@ -536,14 +538,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { sljit_s32 args_size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); arg_types >>= SLJIT_ARG_SHIFT; args_size = 0; diff --git a/src/sljit/sljitNativeX86_64.c b/deps/sljit/sljit_src/sljitNativeX86_64.c similarity index 98% rename from src/sljit/sljitNativeX86_64.c rename to deps/sljit/sljit_src/sljitNativeX86_64.c index 1ab7929..e4d3db8 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/deps/sljit/sljit_src/sljitNativeX86_64.c @@ -454,14 +454,16 @@ typedef struct { #endif /* _WIN64 */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { sljit_uw size; sljit_s32 word_arg_count = 0; sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_s32 saved_regs_size, tmp, i; #ifdef _WIN64 + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 saved_float_regs_size; sljit_s32 saved_float_regs_offset = 0; sljit_s32 float_arg_count = 0; @@ -469,8 +471,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_u8 *inst; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); +#ifdef _WIN64 + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; +#endif /* _WIN64 */ if (options & SLJIT_ENTER_REG_ARG) arg_types = 0; @@ -630,19 +639,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) + sljit_s32 options, sljit_s32 arg_types, + sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size) { sljit_s32 saved_regs_size; #ifdef _WIN64 + sljit_s32 fscratches; + sljit_s32 fsaveds; sljit_s32 saved_float_regs_size; #endif /* _WIN64 */ CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size); + + scratches = ENTER_GET_REGS(scratches); #ifdef _WIN64 + saveds = ENTER_GET_REGS(saveds); + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + local_size += SLJIT_LOCALS_OFFSET; saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); diff --git a/src/sljit/sljitNativeX86_common.c b/deps/sljit/sljit_src/sljitNativeX86_common.c similarity index 90% rename from src/sljit/sljitNativeX86_common.c rename to deps/sljit/sljit_src/sljitNativeX86_common.c index ecb7e9b..9f599d5 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/deps/sljit/sljit_src/sljitNativeX86_common.c @@ -239,6 +239,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { #define MOVDDUP_x_xm 0x12 #define MOVDQA_x_xm 0x6f #define MOVDQA_xm_x 0x7f +#define MOVDQU_x_xm 0x6f #define MOVHLPS_x_x 0x12 #define MOVHPD_m_x 0x17 #define MOVHPD_x_m 0x16 @@ -398,6 +399,13 @@ static sljit_u32 cpu_feature_list = 0; #include #elif defined(_MSC_VER) && _MSC_VER >= 1400 #include +#elif defined(__INTEL_COMPILER) +#include +#endif + +#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \ + || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__)) +#include #endif /******************************************************/ @@ -425,49 +433,20 @@ static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) static void execute_cpu_id(sljit_u32 info[4]) { -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#if (defined(_MSC_VER) && _MSC_VER >= 1400) \ + || (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 2021 && __INTEL_COMPILER_UPDATE >= 7) __cpuidex((int*)info, (int)info[0], (int)info[2]); -#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) +#elif (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) - /* AT&T syntax. */ - __asm__ ( -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - "movl %0, %%esi\n" - "movl (%%esi), %%eax\n" - "movl 8(%%esi), %%ecx\n" - "pushl %%ebx\n" - "cpuid\n" - "movl %%eax, (%%esi)\n" - "movl %%ebx, 4(%%esi)\n" - "popl %%ebx\n" - "movl %%ecx, 8(%%esi)\n" - "movl %%edx, 12(%%esi)\n" -#else /* !SLJIT_CONFIG_X86_32 */ - "movq %0, %%rsi\n" - "movl (%%rsi), %%eax\n" - "movl 8(%%rsi), %%ecx\n" - "cpuid\n" - "movl %%eax, (%%rsi)\n" - "movl %%ebx, 4(%%rsi)\n" - "movl %%ecx, 8(%%rsi)\n" - "movl %%edx, 12(%%rsi)\n" -#endif /* SLJIT_CONFIG_X86_32 */ - : - : "r" (info) -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - : "memory", "eax", "ecx", "edx", "esi" -#else /* !SLJIT_CONFIG_X86_32 */ - : "memory", "rax", "rbx", "rcx", "rdx", "rsi" -#endif /* SLJIT_CONFIG_X86_32 */ - ); + __get_cpuid_count(info[0], info[2], info, info + 1, info + 2, info + 3); -#else /* _MSC_VER < 1400 */ +#elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) \ + && (defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32) /* Intel syntax. */ __asm { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) mov esi, info mov eax, [esi] mov ecx, [esi + 8] @@ -476,30 +455,48 @@ static void execute_cpu_id(sljit_u32 info[4]) mov [esi + 4], ebx mov [esi + 8], ecx mov [esi + 12], edx -#else /* !SLJIT_CONFIG_X86_32 */ - mov rsi, info - mov eax, [rsi] - mov ecx, [rsi + 8] - cpuid - mov [rsi], eax - mov [rsi + 4], ebx - mov [rsi + 8], ecx - mov [rsi + 12], edx -#endif /* SLJIT_CONFIG_X86_32 */ } -#endif /* _MSC_VER && _MSC_VER >= 1400 */ +#else + + __asm__ __volatile__ ( + "cpuid\n" + : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3]) + : "0" (info[0]), "2" (info[2]) + ); + +#endif } static sljit_u32 execute_get_xcr0_low(void) { sljit_u32 xcr0; -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \ + || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__)) xcr0 = (sljit_u32)_xgetbv(0); -#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) +#elif defined(__TINYC__) + + __asm__ ( + "xorl %%ecx, %%ecx\n" + ".byte 0x0f\n" + ".byte 0x01\n" + ".byte 0xd0\n" + : "=a" (xcr0) + : +#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 + : "ecx", "edx" +#else /* !SLJIT_CONFIG_X86_32 */ + : "rcx", "rdx" +#endif /* SLJIT_CONFIG_X86_32 */ + ); + +#elif (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20220100) \ + || (defined(__clang__) && __clang_major__ < 14) \ + || (defined(__GNUC__) && __GNUC__ < 3) \ + || defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* AT&T syntax. */ __asm__ ( @@ -507,23 +504,37 @@ static sljit_u32 execute_get_xcr0_low(void) "xgetbv\n" : "=a" (xcr0) : -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 : "ecx", "edx" #else /* !SLJIT_CONFIG_X86_32 */ : "rcx", "rdx" #endif /* SLJIT_CONFIG_X86_32 */ ); -#else /* _MSC_VER < 1400 */ +#elif defined(_MSC_VER) /* Intel syntax. */ __asm { - mov ecx, 0 + xor ecx, ecx xgetbv mov xcr0, eax } -#endif /* _MSC_VER && _MSC_VER >= 1400 */ +#else + + __asm__ ( + "xor{l %%ecx, %%ecx | ecx, ecx}\n" + "xgetbv\n" + : "=a" (xcr0) + : +#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 + : "ecx", "edx" +#else /* !SLJIT_CONFIG_X86_32 */ + : "rcx", "rdx" +#endif /* SLJIT_CONFIG_X86_32 */ + ); + +#endif return xcr0; } @@ -549,6 +560,10 @@ static void get_cpu_features(void) if (max_id >= 1) { info[0] = 1; +#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32 + /* Winchip 2 and Cyrix MII bugs */ + info[1] = info[2] = 0; +#endif execute_cpu_id(info); if (info[2] & 0x80000) @@ -565,11 +580,17 @@ static void get_cpu_features(void) feature_list |= CPU_FEATURE_CMOV; } - info[0] = 0x80000001; + info[0] = 0x80000000; execute_cpu_id(info); + max_id = info[0]; + + if (max_id >= 0x80000001) { + info[0] = 0x80000001; + execute_cpu_id(info); - if (info[2] & 0x20) - feature_list |= CPU_FEATURE_LZCNT; + if (info[2] & 0x20) + feature_list |= CPU_FEATURE_LZCNT; + } if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0) feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2); @@ -659,18 +680,23 @@ static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_p sljit_uw type = jump->flags >> TYPE_SHIFT; sljit_s32 short_jump; sljit_uw label_addr; + sljit_uw jump_addr; - if (jump->flags & JUMP_ADDR) - label_addr = jump->u.target - (sljit_uw)executable_offset; - else + jump_addr = (sljit_uw)code_ptr; + if (!(jump->flags & JUMP_ADDR)) { label_addr = (sljit_uw)(code + jump->u.label->size); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } else + label_addr = jump->u.target - (sljit_uw)executable_offset; + #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN) + if ((sljit_sw)(label_addr - (jump_addr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump_addr + 5)) < HALFWORD_MIN) return detect_far_jump_type(jump, code_ptr); #endif /* SLJIT_CONFIG_X86_64 */ - short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f; + short_jump = (sljit_sw)(label_addr - (jump_addr + 2)) >= -0x80 && (sljit_sw)(label_addr - (jump_addr + 2)) <= 0x7f; if (type == SLJIT_JUMP) { if (short_jump) @@ -792,6 +818,7 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (next_min_addr != next_jump_addr) continue; + jump->addr -= size_reduce; if (!(jump->flags & JUMP_MOV_ADDR)) { #if (defined SLJIT_DEBUG && SLJIT_DEBUG) size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE); @@ -805,7 +832,11 @@ static void reduce_code_size(struct sljit_compiler *compiler) #endif /* SLJIT_CONFIG_X86_64 */ } else { /* Unit size: instruction. */ - diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce); + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } type = jump->flags >> TYPE_SHIFT; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -845,7 +876,7 @@ static void reduce_code_size(struct sljit_compiler *compiler) #endif /* SLJIT_DEBUG */ if (!(jump->flags & JUMP_ADDR)) { - diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3); + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - 3); if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) size_reduce += 3; @@ -1017,6 +1048,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_COPY_F32: case SLJIT_HAS_COPY_F64: case SLJIT_HAS_ATOMIC: + case SLJIT_HAS_MEMORY_BARRIER: return 1; #if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE @@ -1476,6 +1508,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; + case SLJIT_MEMORY_BARRIER: + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + inst[0] = GROUP_0F; + inst[1] = 0xae; + inst[2] = 0xf0; + return SLJIT_SUCCESS; case SLJIT_ENDBR: return emit_endbranch(compiler); case SLJIT_SKIP_FRAMES_BEFORE_RETURN: @@ -3466,6 +3506,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_u8 cond_set; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 reg; + sljit_uw size; #endif /* !SLJIT_CONFIG_X86_64 */ /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ sljit_s32 dst_save = dst; @@ -3482,35 +3523,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); + size = 3 + 2; + if (reg_map[TMP_REG1] >= 4) + size += 1 + 1; + else if (reg_map[dst] >= 4) + size++; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); - INC_SIZE(4 + 3); + INC_SIZE(size); /* Set low register to conditional flag. */ - inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; - inst[1] = GROUP_0F; - inst[2] = cond_set; - inst[3] = MOD_REG | reg_lmap[TMP_REG1]; - inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); - inst[5] = OR_rm8_r8; - inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); + if (reg_map[TMP_REG1] >= 4) + *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; + + inst[0] = GROUP_0F; + inst[1] = cond_set; + inst[2] = MOD_REG | reg_lmap[TMP_REG1]; + inst += 3; + + if (reg_map[TMP_REG1] >= 4 || reg_map[dst] >= 4) + *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); + + inst[0] = OR_rm8_r8; + inst[1] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); return SLJIT_SUCCESS; } reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); + size = 3 + (reg_map[reg] >= 4) + 4; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); - INC_SIZE(4 + 4); + INC_SIZE(size); /* Set low register to conditional flag. */ - inst[0] = (reg_map[reg] <= 7) ? REX : REX_B; - inst[1] = GROUP_0F; - inst[2] = cond_set; - inst[3] = MOD_REG | reg_lmap[reg]; - inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + + if (reg_map[reg] >= 4) + *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; + + inst[0] = GROUP_0F; + inst[1] = cond_set; + inst[2] = MOD_REG | reg_lmap[reg]; + + inst[3] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); /* The movzx instruction does not affect flags. */ - inst[5] = GROUP_0F; - inst[6] = MOVZX_r_rm8; - inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); + inst[4] = GROUP_0F; + inst[5] = MOVZX_r_rm8; + inst[6] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); if (reg != TMP_REG1) return SLJIT_SUCCESS; @@ -3617,7 +3675,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3626,7 +3684,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co sljit_uw op; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -3670,13 +3728,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *co return SLJIT_SUCCESS; if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) - return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw); + return emit_vex_instruction(compiler, op, vreg, 0, srcdst, srcdstw); - return emit_groupf(compiler, op, freg, srcdst, srcdstw); + return emit_groupf(compiler, op, vreg, srcdst, srcdstw); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3687,7 +3745,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil sljit_uw op; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -3753,48 +3811,48 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (elem_size >= 3) compiler->mode32 = 0; #endif /* SLJIT_CONFIG_X86_64 */ - FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw)); + FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw)); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; #endif /* SLJIT_CONFIG_X86_64 */ - src = freg; + src = vreg; srcw = 0; } if (reg_size == 5) op |= VEX_256; - return emit_vex_instruction(compiler, op, freg, 0, src, srcw); + return emit_vex_instruction(compiler, op, vreg, 0, src, srcw); } } if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) { if (use_vex) - return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0); - return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0); + return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, vreg, vreg, 0); } SLJIT_ASSERT(reg_size == 4); if (use_vex) { if (elem_size == 3) - return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, srcw); + return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, srcw); SLJIT_ASSERT(!(src & SLJIT_MEM)); - FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0)); return emit_byte(compiler, 0); } - if (elem_size == 2 && freg != src) { - FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw)); - src = freg; + if (elem_size == 2 && vreg != src) { + FAIL_IF(emit_sse2_load(compiler, 1, vreg, src, srcw)); + src = vreg; srcw = 0; } op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2; - FAIL_IF(emit_groupf(compiler, op, freg, src, srcw)); + FAIL_IF(emit_groupf(compiler, op, vreg, src, srcw)); if (elem_size == 2) return emit_byte(compiler, 0); @@ -3820,9 +3878,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (srcw == 0 || srcw == -1) { if (use_vex) - return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0); - return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); + return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0); } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -3864,11 +3922,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil if (use_vex) { if (opcode != MOVD_x_rm) { op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode; - FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, freg, src, srcw)); + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, vreg, src, srcw)); } else - FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw)); + FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw)); } else { - inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw); + inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw); FAIL_IF(!inst); inst[0] = GROUP_0F; inst[1] = opcode; @@ -3879,13 +3937,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil } } - if (use_vex && elem_size >= 2) { + if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && elem_size >= 2) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) op = VPBROADCASTD_x_xm; #else /* !SLJIT_CONFIG_X86_32 */ op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm; #endif /* SLJIT_CONFIG_X86_32 */ - return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0); } SLJIT_ASSERT(reg_size == 4); @@ -3897,37 +3955,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compil case 0: if (use_vex) { FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); - return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, TMP_FREG, 0); + return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, TMP_FREG, 0); } FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); - return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); + return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0); case 1: if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, vreg, 0)); else - FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0)); + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, vreg, 0)); FAIL_IF(emit_byte(compiler, 0)); /* fallthrough */ default: if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0)); else - FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0)); return emit_byte(compiler, 0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) case 3: compiler->mode32 = 1; if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0)); else - FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0)); return emit_byte(compiler, 0x44); #endif /* SLJIT_CONFIG_X86_64 */ } } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 vreg, sljit_s32 lane_index, sljit_s32 srcdst, sljit_sw srcdstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -3936,7 +3994,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile sljit_u8 *inst; sljit_u8 opcode = 0; sljit_uw op; - sljit_s32 freg_orig = freg; + sljit_s32 vreg_orig = vreg; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 srcdst_is_ereg = 0; sljit_s32 srcdst_orig = 0; @@ -3944,7 +4002,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile #endif /* SLJIT_CONFIG_X86_32 */ CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw)); ADJUST_LOCAL_OFFSET(srcdst, srcdstw); @@ -4004,29 +4062,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (elem_size == 2) { if (use_vex) - return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); - return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw); + return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw); + return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, vreg, srcdst, srcdstw); } } else if (srcdst & SLJIT_MEM) { SLJIT_ASSERT(elem_size == 2 || elem_size == 3); if (use_vex) - return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw); - return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw); + return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, 0, srcdst, srcdstw); + return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, srcdst, srcdstw); } else if (elem_size == 3) { if (use_vex) - return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0); - return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0); + return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, 0, srcdst, 0); + return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, srcdst, 0); } else if (use_vex) { FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); - return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, freg, TMP_FREG, srcdst, 0); + return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, vreg, TMP_FREG, srcdst, 0); } } if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { - freg = TMP_FREG; + vreg = TMP_FREG; lane_index -= (1 << (4 - elem_size)); - } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + } else if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) { if (use_vex) FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw)); else @@ -4039,14 +4097,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2; if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, freg, freg, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, vreg, vreg, vreg, 0)); else - FAIL_IF(emit_groupf(compiler, op, freg, freg, 0)); + FAIL_IF(emit_groupf(compiler, op, vreg, vreg, 0)); } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { - FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0)); FAIL_IF(emit_byte(compiler, 1)); - freg = TMP_FREG; + vreg = TMP_FREG; lane_index -= (1 << (4 - elem_size)); } @@ -4059,55 +4117,55 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m; /* VEX prefix clears upper bits of the target register. */ - if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || freg == TMP_FREG)) + if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || vreg == TMP_FREG)) FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2 - | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), freg, (type & SLJIT_SIMD_STORE) ? 0 : freg, srcdst, srcdstw)); + | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), vreg, (type & SLJIT_SIMD_STORE) ? 0 : vreg, srcdst, srcdstw)); else - FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw)); + FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, vreg, srcdst, srcdstw)); - /* In case of store, freg is not TMP_FREG. */ + /* In case of store, vreg is not TMP_FREG. */ } else if (type & SLJIT_SIMD_STORE) { if (lane_index == 1) { if (use_vex) - return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0); - return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0); + return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0); + return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, vreg, 0); } if (use_vex) - return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0); - return emit_sse2_load(compiler, 0, srcdst, freg, 0); - } else if (use_vex && (reg_size == 4 || freg == TMP_FREG)) { + return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0); + return emit_sse2_load(compiler, 0, srcdst, vreg, 0); + } else if (use_vex && (reg_size == 4 || vreg == TMP_FREG)) { if (lane_index == 1) - FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0)); + FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0)); else - FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0)); + FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0)); } else { if (lane_index == 1) - FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0)); + FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, vreg, srcdst, 0)); else - FAIL_IF(emit_sse2_load(compiler, 0, freg, srcdst, 0)); + FAIL_IF(emit_sse2_load(compiler, 0, vreg, srcdst, 0)); } } else if (type & SLJIT_SIMD_STORE) { if (lane_index == 0) { if (use_vex) - return emit_vex_instruction(compiler, ((srcdst & SLJIT_MEM) ? MOVSD_xm_x : MOVSD_x_xm) | EX86_PREF_F3 | EX86_SSE2 - | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), freg, ((srcdst & SLJIT_MEM) ? 0 : freg), srcdst, srcdstw); - return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg); + return emit_vex_instruction(compiler, MOVSD_xm_x | EX86_PREF_F3 | EX86_SSE2 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), + vreg, ((srcdst & SLJIT_MEM) ? 0 : srcdst), srcdst, srcdstw); + return emit_sse2_store(compiler, 1, srcdst, srcdstw, vreg); } if (srcdst & SLJIT_MEM) { if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, srcdst, srcdstw)); + FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, srcdst, srcdstw)); else - FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw)); return emit_byte(compiler, U8(lane_index)); } if (use_vex) { - FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, vreg, vreg, 0)); return emit_byte(compiler, U8(lane_index)); } - if (srcdst == freg) + if (srcdst == vreg) op = SHUFPS_x_xm | EX86_SSE2; else { switch (lane_index) { @@ -4124,7 +4182,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } } - FAIL_IF(emit_groupf(compiler, op, srcdst, freg, 0)); + FAIL_IF(emit_groupf(compiler, op, srcdst, vreg, 0)); op &= 0xff; if (op == SHUFPS_x_xm || op == PSHUFD_x_xm) @@ -4133,23 +4191,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile return SLJIT_SUCCESS; } else { if (lane_index != 0 || (srcdst & SLJIT_MEM)) { - FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw)); FAIL_IF(emit_byte(compiler, U8(lane_index << 4))); } else - FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst)); + FAIL_IF(emit_sse2_store(compiler, 1, vreg, 0, srcdst)); } - if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE)) + if (vreg != TMP_FREG || (type & SLJIT_SIMD_STORE)) return SLJIT_SUCCESS; SLJIT_ASSERT(reg_size == 5); if (type & SLJIT_SIMD_LANE_ZERO) { - FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0)); return emit_byte(compiler, 0x4e); } - FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0)); return emit_byte(compiler, 1); } @@ -4186,9 +4244,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile if (use_vex && (type & SLJIT_SIMD_STORE)) { op = opcode | ((op == 3) ? VEX_OP_0F3A : 0); - FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, 0, srcdst, srcdstw)); + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, 0, srcdst, srcdstw)); } else { - inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw); FAIL_IF(!inst); inst[0] = GROUP_0F; @@ -4202,15 +4260,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile FAIL_IF(emit_byte(compiler, U8(lane_index))); if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) { - if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) { + if (vreg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) { SLJIT_ASSERT(reg_size == 5); if (type & SLJIT_SIMD_LANE_ZERO) { - FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0)); return emit_byte(compiler, 0x4e); } - FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0)); return emit_byte(compiler, 1); } @@ -4262,7 +4320,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compile } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_s32 src_lane_index) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4277,7 +4335,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c #endif /* SLJIT_CONFIG_X86_32 */ CHECK_ERROR(); - CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index)); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -4301,9 +4359,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (reg_size == 5) { if (src_lane_index == 0) - return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0); - FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); byte = U8(byte | (byte << 2)); return emit_byte(compiler, U8(byte | (byte << 4))); @@ -4311,8 +4369,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (src_lane_index == 0) { if (use_vex) - return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0); - return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0); + return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, 0); + return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, src, 0); } /* Changes it to SHUFPD_x_xm. */ @@ -4326,9 +4384,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c SLJIT_ASSERT(elem_size == 2); if (src_lane_index == 0) - return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0); - FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); byte = 0x44; if (src_lane_index >= 4) { @@ -4337,15 +4395,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c } FAIL_IF(emit_byte(compiler, byte)); - FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0)); byte = U8(src_lane_index); } else if (use_vex) { - FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0)); } else { - if (freg != src) - FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0)); + if (vreg != src) + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, vreg, src, 0)); - FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0)); + FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, vreg, vreg, 0)); } if (elem_size == 2) { @@ -4362,13 +4420,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (elem_size == 0) { if (reg_size == 5 && src_lane_index >= 16) { - FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa)); src_lane_index &= 0x7; - src = freg; + src = vreg; } - if (src_lane_index != 0 || (freg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) { + if (src_lane_index != 0 || (vreg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) { pref = 0; if ((src_lane_index & 0x3) == 0) { @@ -4379,33 +4437,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c byte = U8(src_lane_index >> 1); } else { if (!use_vex) { - if (freg != src) - FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + if (vreg != src) + FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0)); - FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0)); + FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, vreg, 0)); } else - FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, vreg, src, 0)); FAIL_IF(emit_byte(compiler, U8(src_lane_index))); } if (pref != 0) { if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0)); else - FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0)); FAIL_IF(emit_byte(compiler, byte)); } - src = freg; + src = vreg; } if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2)) - return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0); SLJIT_ASSERT(reg_size == 4); FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); - return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); + return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0); } if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) { @@ -4424,7 +4482,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c if (reg_size == 5) pref |= VEX_256; - return emit_vex_instruction(compiler, pref, freg, 0, src, 0); + return emit_vex_instruction(compiler, pref, vreg, 0, src, 0); } if (reg_size == 5) { @@ -4443,22 +4501,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c pref = 0; break; default: - FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee)); } if (pref != 0) { - FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, pref, vreg, 0, src, 0)); byte = U8(byte | (byte << 2)); FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); if (src_lane_index == 0) - return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0); - src = freg; + src = vreg; } - FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0)); byte = U8(src_lane_index); byte = U8(byte | (byte << 2)); return emit_byte(compiler, U8(byte | (byte << 4))); @@ -4471,16 +4529,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3; if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0)); else - FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0)); byte = U8(byte | (byte << 2)); FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2) - return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0); - src = freg; + src = vreg; /* fallthrough */ case 2: byte = U8(src_lane_index); @@ -4493,14 +4551,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_c } if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, src, 0)); else - FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0)); return emit_byte(compiler, U8(byte | (byte << 4))); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 src, sljit_sw srcw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4510,7 +4568,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler sljit_u8 opcode; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); @@ -4533,8 +4591,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler return SLJIT_SUCCESS; if (use_vex) - return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, freg, 0, src, srcw); - return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw); + return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, vreg, 0, src, srcw); + return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, vreg, src, srcw); } switch (elem_size) { @@ -4570,12 +4628,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler return SLJIT_SUCCESS; if (use_vex) - return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw); - return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw); + return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, srcw); + return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, src, srcw); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 freg, + sljit_s32 vreg, sljit_s32 dst, sljit_sw dstw) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); @@ -4586,7 +4644,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c sljit_u8 *inst; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); @@ -4607,10 +4665,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c switch (elem_size) { case 1: if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, vreg, 0)); else - FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0)); - freg = TMP_FREG; + FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, vreg, 0)); + vreg = TMP_FREG; break; case 2: op = EX86_SSE2_OP2; @@ -4621,9 +4679,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x; if (use_vex) - FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0)); else - FAIL_IF(emit_groupf(compiler, op, dst_r, freg, 0)); + FAIL_IF(emit_groupf(compiler, op, dst_r, vreg, 0)); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = type & SLJIT_32; @@ -4650,9 +4708,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (elem_size == 1) { - FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0)); FAIL_IF(emit_byte(compiler, 1)); - FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0)); + FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, TMP_FREG, 0)); FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0)); } else { op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2; @@ -4662,7 +4720,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c else if (elem_size == 3) op |= EX86_PREF_66; - FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0)); + FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0)); } if (dst_r == TMP_REG1) { @@ -4676,7 +4734,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *c } static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src_freg) + sljit_s32 dst_vreg, sljit_s32 src_vreg) { sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2; @@ -4685,18 +4743,21 @@ static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3) op |= EX86_PREF_66; - return emit_groupf(compiler, op, dst_freg, src_freg, 0); + return emit_groupf(compiler, op, dst_vreg, src_vreg, 0); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) + sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w) { sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); sljit_uw op = 0; + sljit_uw mov_op = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w)); + ADJUST_LOCAL_OFFSET(src2, src2w); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -4730,27 +4791,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *co if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) op |= EX86_PREF_66; break; + + case SLJIT_SIMD_OP2_SHUFFLE: + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + op = PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38; + break; } if (type & SLJIT_SIMD_TEST) return SLJIT_SUCCESS; - if (reg_size == 5 || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) { + if ((src2 & SLJIT_MEM) && SLJIT_SIMD_GET_ELEM2_SIZE(type) < reg_size) { + mov_op = ((type & SLJIT_SIMD_FLOAT) ? (MOVUPS_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0)) : (MOVDQU_x_xm | EX86_PREF_F3)) | EX86_SSE2; + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, mov_op, TMP_FREG, 0, src2, src2w)); + else + FAIL_IF(emit_groupf(compiler, mov_op, TMP_FREG, src2, src2w)); + + src2 = TMP_FREG; + src2w = 0; + } + + if (reg_size == 5 || use_vex) { if (reg_size == 5) op |= VEX_256; - return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0); + return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_vreg, src1_vreg, src2, src2w); } - if (dst_freg != src1_freg) { - if (dst_freg == src2_freg) - src2_freg = src1_freg; - else - FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg)); + if (dst_vreg != src1_vreg) { + if (dst_vreg == src2) { + if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) { + FAIL_IF(emit_simd_mov(compiler, type, TMP_FREG, src2)); + FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg)); + src2 = TMP_FREG; + src2w = 0; + } else + src2 = src1_vreg; + } else + FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg)); } - FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0)); - return SLJIT_SUCCESS; + if (op & (VEX_OP_0F38 | VEX_OP_0F3A)) + return emit_groupf_ext(compiler, op | EX86_SSE2, dst_vreg, src2, src2w); + return emit_groupf(compiler, op | EX86_SSE2, dst_vreg, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, @@ -4760,8 +4846,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32) + return SLJIT_ERR_UNSUPPORTED; + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + SLJIT_SKIP_CHECKS(compiler); - return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); + return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, @@ -4770,8 +4862,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler sljit_s32 temp_reg) { sljit_uw pref; - sljit_s32 free_reg = TMP_REG1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 saved_reg = TMP_REG1; + sljit_s32 swap_tmp = 0; sljit_sw srcw = 0; sljit_sw tempw = 0; #endif /* SLJIT_CONFIG_X86_32 */ @@ -4784,18 +4877,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP)); SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP)); + if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32) + return SLJIT_ERR_UNSUPPORTED; + + if (op & SLJIT_ATOMIC_TEST) + return SLJIT_SUCCESS; + op = GET_OPCODE(op); + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (temp_reg == SLJIT_TMP_DEST_REG) { + FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1])); + + if (src_reg == SLJIT_R0) + src_reg = TMP_REG1; + if (mem_reg == SLJIT_R0) + mem_reg = TMP_REG1; + + temp_reg = SLJIT_R0; + swap_tmp = 1; + } + + /* Src is virtual register or its low byte is not accessible. */ if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) { - /* Src is virtual register or its low byte is not accessible. */ - SLJIT_ASSERT(src_reg != SLJIT_R1); - free_reg = src_reg; + SLJIT_ASSERT(src_reg != SLJIT_R1 && temp_reg != SLJIT_TMP_DEST_REG); + + if (swap_tmp) { + saved_reg = (mem_reg != SLJIT_R1) ? SLJIT_R1 : SLJIT_R2; + + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, saved_reg, 0); + EMIT_MOV(compiler, saved_reg, 0, src_reg, srcw); + } else + EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw); - EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw); - src_reg = TMP_REG1; + src_reg = saved_reg; if (mem_reg == src_reg) - mem_reg = TMP_REG1; + mem_reg = saved_reg; } #endif /* SLJIT_CONFIG_X86_32 */ @@ -4803,29 +4921,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; - EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_R0, 0); EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0); if (src_reg == SLJIT_R0) - src_reg = free_reg; + src_reg = TMP_REG2; if (mem_reg == SLJIT_R0) - mem_reg = free_reg; + mem_reg = TMP_REG2; #else /* !SLJIT_CONFIG_X86_64 */ - if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) { - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0); - EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0); - EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + SLJIT_ASSERT(!swap_tmp); - mem_reg = SLJIT_R1; - free_reg = SLJIT_R1; + if (src_reg == TMP_REG1) { + if (mem_reg == SLJIT_R0) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + + mem_reg = SLJIT_R1; + saved_reg = SLJIT_R1; + } else { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + saved_reg = SLJIT_R0; + } } else { - EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R0, 0); EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); if (src_reg == SLJIT_R0) - src_reg = free_reg; + src_reg = TMP_REG1; if (mem_reg == SLJIT_R0) - mem_reg = free_reg; + mem_reg = TMP_REG1; } #endif /* SLJIT_CONFIG_X86_64 */ } @@ -4847,14 +4973,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (swap_tmp) { + SLJIT_ASSERT(temp_reg == SLJIT_R0); + FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1])); + + if (saved_reg != TMP_REG1) + return emit_mov(compiler, saved_reg, 0, SLJIT_MEM1(SLJIT_SP), 0); + return SLJIT_SUCCESS; + } +#endif /* SLJIT_CONFIG_X86_32 */ + if (temp_reg != SLJIT_R0) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; - return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0); + return emit_mov(compiler, SLJIT_R0, 0, TMP_REG2, 0); #else /* !SLJIT_CONFIG_X86_64 */ - EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0); - if (free_reg != TMP_REG1) - return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, (saved_reg == SLJIT_R0) ? SLJIT_MEM1(SLJIT_SP) : saved_reg, 0); + if (saved_reg == SLJIT_R1) + return emit_mov(compiler, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_SP), 0); #endif /* SLJIT_CONFIG_X86_64 */ } return SLJIT_SUCCESS; diff --git a/src/sljit/sljitSerialize.c b/deps/sljit/sljit_src/sljitSerialize.c similarity index 100% rename from src/sljit/sljitSerialize.c rename to deps/sljit/sljit_src/sljitSerialize.c diff --git a/src/sljit/sljitUtils.c b/deps/sljit/sljit_src/sljitUtils.c similarity index 100% rename from src/sljit/sljitUtils.c rename to deps/sljit/sljit_src/sljitUtils.c diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt index 851976a..bb687f7 100644 --- a/doc/html/NON-AUTOTOOLS-BUILD.txt +++ b/doc/html/NON-AUTOTOOLS-BUILD.txt @@ -105,6 +105,7 @@ example. pcre2_chkdint.c pcre2_chartables.c pcre2_compile.c + pcre2_compile_class.c pcre2_config.c pcre2_context.c pcre2_convert.c @@ -138,7 +139,7 @@ example. Note that you must compile pcre2_jit_compile.c, even if you have not defined SUPPORT_JIT in src/config.h, because when JIT support is not configured, dummy functions are compiled. When JIT support IS configured, - pcre2_jit_compile.c #includes other files from the sljit subdirectory, + pcre2_jit_compile.c #includes other files from the sljit dependency, all of whose names begin with "sljit". It also #includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile those yourself. @@ -301,56 +302,66 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no spaces in the names for your CMake installation and your PCRE2 source and build directories. -The following instructions were contributed by a PCRE1 user, but they should -also work for PCRE2. If they are not followed exactly, errors may occur. In the -event that errors do occur, it is recommended that you delete the CMake cache -before attempting to repeat the CMake build process. In the CMake GUI, the -cache can be deleted by selecting "File > Delete Cache". +If you are using CMake and encounter errors, deleting the CMake cache and +restarting from a fresh build may fix the error. In the CMake GUI, the cache can +be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can +be deleted. -1. Install the latest CMake version available from http://www.cmake.org/, and - ensure that cmake\bin is on your path. +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. -2. Unzip (retaining folder structure) the PCRE2 source tree into a source - directory such as C:\pcre2. You should ensure your local date and time - is not earlier than the file dates in your source dir if the release is - very new. +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. -3. Create a new, empty build directory, preferably a subdirectory of the - source dir. For example, C:\pcre2\pcre2-xx\build. +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. -4. Run cmake-gui from the Shell environment of your build tool, for example, - Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try - to start Cmake from the Windows Start menu, as this can lead to errors. +4. Run CMake. -5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and - build directories, respectively. + - Using the CLI, simply run `cmake ..` inside the `build/` directory. You can + use the `ccmake` ncurses GUI to select and configure PCRE2 features. -6. Hit the "Configure" button. + - Using the CMake GUI: -7. Select the particular IDE / build tool that you are using (Visual - Studio, MSYS makefiles, MinGW makefiles, etc.) + a) Run cmake-gui from the Shell environment of your build tool, for + example, Msys for Msys/MinGW or Visual Studio Command Prompt for + VC/VC++. -8. The GUI will then list several configuration options. This is where - you can disable Unicode support or select other PCRE2 optional features. + b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. -9. Hit "Configure" again. The adjacent "Generate" button should now be - active. + c) Press the "Configure" button. -10. Hit "Generate". + d) Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) -11. The build directory should now contain a usable build system, be it a - solution file for Visual Studio, makefiles for MinGW, etc. Exit from - cmake-gui and use the generated build system with your compiler or IDE. - E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 - solution, select the desired configuration (Debug, or Release, etc.) and - build the ALL_BUILD project. + e) The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. -12. If during configuration with cmake-gui you've elected to build the test - programs, you can execute them by building the test project. E.g., for - MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The - most recent build configuration is targeted by the tests. A summary of - test results is presented. Complete test output is subsequently - available for review in Testing\Temporary under your build dir. + f) Press "Configure" again. The adjacent "Generate" button should now be + active. + + g) Press "Generate". + +5. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + + Regardless of build system used, `cmake --build .` will build it. + +6. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + Regardless of build system used, `ctest` will run the tests. BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO @@ -425,6 +436,7 @@ OpenVMS. They are in the "vms" directory in the distribution tarball. Please read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep programs contain some VMS-specific code. -=========================== -Last Updated: 16 April 2024 -=========================== +============================== +Last updated: 26 December 2024 +============================== + diff --git a/doc/html/README.txt b/doc/html/README.txt index dab5e94..5a50f7f 100644 --- a/doc/html/README.txt +++ b/doc/html/README.txt @@ -385,7 +385,7 @@ library. They are also documented in the pcre2build man page. If this is done, when pcre2test's input is from a terminal, it reads it using the readline() function. This provides line-editing and history facilities. - Note that libreadline is GPL-licenced, so if you distribute a binary of + Note that libreadline is GPL-licensed, so if you distribute a binary of pcre2test linked in this way, there may be licensing issues. These can be avoided by linking with libedit (which has a BSD licence) instead. @@ -411,20 +411,19 @@ library. They are also documented in the pcre2build man page. Instead of %td or %zu, %lu is used, with a cast for size_t values. . There is a special option called --enable-fuzz-support for use by people who - want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit - library. If set, it causes an extra library called libpcre2-fuzzsupport.a to - be built, but not installed. This contains a single function called - LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the - length of the string. When called, this function tries to compile the string - as a pattern, and if that succeeds, to match it. This is done both with no - options and with some random options bits that are generated from the string. - Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to - be created. This is normally run under valgrind or used when PCRE2 is - compiled with address sanitizing enabled. It calls the fuzzing function and - outputs information about what it is doing. The input strings are specified - by arguments: if an argument starts with "=" the rest of it is a literal - input string. Otherwise, it is assumed to be a file name, and the contents - of the file are the test string. + want to run fuzzing tests on PCRE2. If set, it causes an extra library + called libpcre2-fuzzsupport.a to be built, but not installed. This contains + a single function called LLVMFuzzerTestOneInput() whose arguments are a + pointer to a string and the length of the string. When called, this function + tries to compile the string as a pattern, and if that succeeds, to match + it. This is done both with no options and with some random options bits that + are generated from the string. Setting --enable-fuzz-support also causes an + executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally + run under valgrind or used when PCRE2 is compiled with address sanitizing + enabled. It calls the fuzzing function and outputs information about what it + is doing. The input strings are specified by arguments: if an argument + starts with "=" the rest of it is a literal input string. Otherwise, it is + assumed to be a file name, and the contents of the file are the test string. . Releases before 10.30 could be compiled with --disable-stack-for-recursion, which caused pcre2_match() to use individual blocks on the heap for @@ -510,6 +509,7 @@ system. The following are installed (file names are all relative to the LICENCE NEWS README + SECURITY pcre2.txt (a concatenation of the man(3) pages) pcre2test.txt the pcre2test man page pcre2grep.txt the pcre2grep man page @@ -607,8 +607,9 @@ zip formats. The command "make distcheck" does the same, but then does a trial build of the new distribution to ensure that it works. If you have modified any of the man page sources in the doc directory, you -should first run the PrepareRelease script before making a distribution. This -script creates the .txt and HTML forms of the documentation from the man pages. +should first run the maint/PrepareRelease script before making a distribution. +This script creates the .txt and HTML forms of the documentation from the man +pages. Testing PCRE2 @@ -822,37 +823,38 @@ The distribution should contain the files listed below. ASCII coding; unless --enable-rebuild-chartables is specified, used by copying to pcre2_chartables.c - src/pcre2posix.c ) - src/pcre2_auto_possess.c ) - src/pcre2_chkdint.c ) - src/pcre2_compile.c ) - src/pcre2_config.c ) - src/pcre2_context.c ) - src/pcre2_convert.c ) - src/pcre2_dfa_match.c ) - src/pcre2_error.c ) - src/pcre2_extuni.c ) - src/pcre2_find_bracket.c ) - src/pcre2_jit_compile.c ) - src/pcre2_jit_match.c ) sources for the functions in the library, - src/pcre2_jit_misc.c ) and some internal functions that they use - src/pcre2_maketables.c ) - src/pcre2_match.c ) - src/pcre2_match_data.c ) - src/pcre2_newline.c ) - src/pcre2_ord2utf.c ) - src/pcre2_pattern_info.c ) - src/pcre2_script_run.c ) - src/pcre2_serialize.c ) - src/pcre2_string_utils.c ) - src/pcre2_study.c ) - src/pcre2_substitute.c ) - src/pcre2_substring.c ) - src/pcre2_tables.c ) - src/pcre2_ucd.c ) - src/pcre2_ucptables.c ) - src/pcre2_valid_utf.c ) - src/pcre2_xclass.c ) + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_chkdint.c ) + src/pcre2_compile.c ) + src/pcre2_compile_class.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_ucptables.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) src/pcre2_printint.c debugging function that is used by pcre2test, src/pcre2_fuzzsupport.c function for (optional) fuzzing support @@ -860,13 +862,16 @@ The distribution should contain the files listed below. src/config.h.in template for config.h, when built by "configure" src/pcre2.h.in template for pcre2.h when built by "configure" src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_compile.h header for internal use src/pcre2_internal.h header for internal use src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_jit_char_inc.h header used by JIT src/pcre2_jit_neon_inc.h header used by JIT src/pcre2_jit_simd_inc.h header used by JIT src/pcre2_ucp.h header for Unicode property handling + src/pcre2_util.h header for internal utils - sljit/* source files for the JIT compiler + deps/sljit/sljit_src/* source files for the JIT compiler (B) Source files for programs that use PCRE2: @@ -878,48 +883,49 @@ The distribution should contain the files listed below. (C) Auxiliary files: - 132html script to turn "man" pages into HTML - AUTHORS information about the author of PCRE2 + AUTHORS.md information about the authors of PCRE2 ChangeLog log of changes to the code - CleanTxt script to clean nroff output for txt man pages - Detrail script to remove trailing spaces HACKING some notes about the internals of PCRE2 INSTALL generic installation instructions - LICENCE conditions for the use of PCRE2 + LICENCE.md conditions for the use of PCRE2 COPYING the same, using GNU's standard name + SECURITY.md information on reporting vulnerabilities Makefile.in ) template for Unix Makefile, which is built by ) "configure" Makefile.am ) the automake input that was used to create ) Makefile.in NEWS important changes in this release NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools - PrepareRelease script to make preparations for "make dist" README this file RunTest a Unix shell script for running tests RunGrepTest a Unix shell script for pcre2grep tests + RunTest.bat a Windows batch file for running tests + RunGrepTest.bat a Windows batch file for pcre2grep tests aclocal.m4 m4 macros (generated by "aclocal") - config.guess ) files used by libtool, - config.sub ) used only when building a shared library + m4/* m4 macros (used by autoconf) configure a configuring shell script (built by autoconf) configure.ac ) the autoconf input that was used to build ) "configure" and config.h - depcomp ) script to find program dependencies, generated by - ) automake doc/*.3 man page sources for PCRE2 doc/*.1 man page sources for pcre2grep and pcre2test - doc/index.html.src the base HTML page doc/html/* HTML documentation doc/pcre2.txt plain text version of the man pages + doc/pcre2-config.txt plain text documentation of pcre2-config script + doc/pcre2grep.txt plain text documentation of grep utility program doc/pcre2test.txt plain text documentation of test program - install-sh a shell script for installing files libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config - ltmain.sh file used to build a libtool script - missing ) common stub for a few missing GNU programs while - ) installing, generated by automake - mkinstalldirs script for making install directories + ar-lib ) + config.guess ) + config.sub ) + depcomp ) helper tools generated by libtool and + compile ) automake, used internally by ./configure + install-sh ) + ltmain.sh ) + missing ) + test-driver ) perltest.sh Script for running a Perl test program pcre2-config.in source of script which retains PCRE2 information testdata/testinput* test data for main library tests @@ -927,12 +933,13 @@ The distribution should contain the files listed below. testdata/grep* input and output for pcre2grep tests testdata/* other supporting test files -(D) Auxiliary files for cmake support +(D) Auxiliary files for CMake support cmake/COPYING-CMAKE-SCRIPTS - cmake/FindPackageHandleStandardArgs.cmake cmake/FindEditline.cmake cmake/FindReadline.cmake + cmake/pcre2-config-version.cmake.in + cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in @@ -943,14 +950,21 @@ The distribution should contain the files listed below. src/config.h.generic ) a version of config.h for use in non-"configure" ) environments -(F) Auxiliary files for building PCRE2 under OpenVMS +(F) Auxiliary files for building PCRE2 using other build systems + + BUILD.bazel ) + MODULE.bazel ) files used by the Bazel build system + WORKSPACE.bazel ) + build.zig file used by zig's build system + +(G) Auxiliary files for building PCRE2 under OpenVMS vms/configure.com ) vms/openvms_readme.txt ) These files were contributed by a PCRE2 user. vms/pcre2.h_patch ) vms/stdint.h ) -Philip Hazel -Email local part: Philip.Hazel -Email domain: gmail.com -Last updated: 15 April 2024 +============================== +Last updated: 18 December 2024 +============================== + diff --git a/doc/html/index.html b/doc/html/index.html index e4dc786..2d81b67 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -267,6 +267,9 @@ in the library. pcre2_set_offset_limit   Set the offset limit +pcre2_set_optimize +   Set an optimization directive + pcre2_set_parens_nest_limit   Set the parentheses nesting limit @@ -276,6 +279,12 @@ in the library. pcre2_set_recursion_memory_management   Obsolete function that (from 10.30 onwards) does nothing +pcre2_set_substitute_callout +   Set a substitution callout function + +pcre2_set_substitute_case_callout +   Set a substitution case callout function + pcre2_substitute   Match a compiled pattern to a subject string and do substitutions diff --git a/doc/html/pcre2.html b/doc/html/pcre2.html index 4cb83dc..e72b6b1 100644 --- a/doc/html/pcre2.html +++ b/doc/html/pcre2.html @@ -16,7 +16,7 @@ please consult the man page, in case the conversion went wrong.
  • INTRODUCTION
  • SECURITY CONSIDERATIONS
  • USER DOCUMENTATION -
  • AUTHOR +
  • AUTHORS
  • REVISION
    INTRODUCTION
    @@ -190,22 +190,22 @@ listing), and the short pages for individual functions, are concatenated in In the "man" and HTML formats, there is also a short page for each C library function, listing its arguments and results.

    -
    AUTHOR
    +
    AUTHORS

    -Philip Hazel -
    -Retired from University Computing Service -
    -Cambridge, England. -
    +The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg. +

    +

    +PCRE2 was written by Philip Hazel, of the University Computing Service, +Cambridge, England. Many others have also contributed.

    -Putting an actual email address here is a spam magnet. If you want to email me, -use my two names separated by a dot at gmail.com. +To contact the maintainers, please use the GitHub issues tracker or PCRE2 +mailing list, as described at the project page: +https://github.com/PCRE2Project/pcre2


    REVISION

    -Last updated: 27 August 2021 +Last updated: 18 December 2024
    Copyright © 1997-2021 University of Cambridge.
    diff --git a/doc/html/pcre2_compile.html b/doc/html/pcre2_compile.html index f0080ea..ee933f3 100644 --- a/doc/html/pcre2_compile.html +++ b/doc/html/pcre2_compile.html @@ -57,6 +57,7 @@ The primary option bits are: PCRE2_ALLOW_EMPTY_CLASS Allow empty classes PCRE2_ALT_BSUX Alternative handling of \u, \U, and \x PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode + PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax PCRE2_ALT_VERBNAMES Process backslashes in verb names PCRE2_AUTO_CALLOUT Compile automatic callouts PCRE2_CASELESS Do caseless matching diff --git a/doc/html/pcre2_jit_compile.html b/doc/html/pcre2_jit_compile.html index 873d0dd..791dd0c 100644 --- a/doc/html/pcre2_jit_compile.html +++ b/doc/html/pcre2_jit_compile.html @@ -33,9 +33,18 @@ details are given in the documentation.

    -The first argument is a pointer that was returned by a successful call to -pcre2_compile(), and the second must contain one or more of the following -bits: +The availability of JIT support can be tested by calling +pcre2_compile_jit() with a single option PCRE2_JIT_TEST_ALLOC (the +code argument is ignored, so a NULL value is accepted). Such a call +returns zero if JIT is available and has a working allocator. Otherwise +it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate +executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not +compiled. +

    +

    +Otherwise, the first argument must be a pointer that was returned by a +successful call to pcre2_compile(), and the second must contain one or +more of the following bits:

       PCRE2_JIT_COMPLETE      compile code for full matching
       PCRE2_JIT_PARTIAL_SOFT  compile code for soft partial matching
    @@ -46,11 +55,13 @@ superseded by the pcre2_compile() option PCRE2_MATCH_INVALID_UTF. The old
     option is deprecated and may be removed in the future.
     

    -The yield of the function is 0 for success, or a negative error code otherwise. -In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or -if an unknown bit is set in options. The function can also return -PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the -compiler, even if it was because of a system security restriction. +The yield of the function when called with any of the three options above is 0 +for success, or a negative error code otherwise. In particular, +PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown +bit is set in options. The function can also return PCRE2_ERROR_NOMEMORY +if JIT is unable to allocate executable memory for the compiler, even if it was +because of a system security restriction. In a few cases, the function may +return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features.

    There is a complete description of the PCRE2 native API in the diff --git a/doc/html/pcre2_set_compile_extra_options.html b/doc/html/pcre2_set_compile_extra_options.html index 4924ed7..cb62022 100644 --- a/doc/html/pcre2_set_compile_extra_options.html +++ b/doc/html/pcre2_set_compile_extra_options.html @@ -43,6 +43,10 @@ options are: PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines PCRE2_EXTRA_MATCH_WORD Pattern matches "words" + PCRE2_EXTRA_NEVER_CALLOUT Disallow callouts in pattern + PCRE2_EXTRA_NO_BS0 Disallow \0 (but not \00 or \000) + PCRE2_EXTRA_PYTHON_OCTAL Use Python rules for octal + PCRE2_EXTRA_TURKISH_CASING Use Turkish I case folding

    There is a complete description of the PCRE2 native API in the pcre2api diff --git a/doc/html/pcre2_set_max_pattern_compiled_length.html b/doc/html/pcre2_set_max_pattern_compiled_length.html index ab570cf..a40f41e 100644 --- a/doc/html/pcre2_set_max_pattern_compiled_length.html +++ b/doc/html/pcre2_set_max_pattern_compiled_length.html @@ -27,9 +27,9 @@ DESCRIPTION

    This function sets, in a compile context, the maximum size (in bytes) for the -memory needed to hold the compiled version of a pattern that is compiled with -this context. The result is always zero. If a pattern that is passed to -pcre2_compile() with this context needs more memory, an error is +memory needed to hold the compiled version of a pattern that is using this +context. The result is always zero. If a pattern that is passed to +pcre2_compile() referencing this context needs more memory, an error is generated. The default is the largest number that a PCRE2_SIZE variable can hold, which is effectively unlimited.

    diff --git a/doc/html/pcre2_set_optimize.html b/doc/html/pcre2_set_optimize.html new file mode 100644 index 0000000..47caeb2 --- /dev/null +++ b/doc/html/pcre2_set_optimize.html @@ -0,0 +1,57 @@ + + +pcre2_set_optimize specification + + +

    pcre2_set_optimize man page

    +

    +Return to the PCRE2 index page. +

    +

    +This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
    +
    +SYNOPSIS +
    +

    +#include <pcre2.h> +

    +

    +int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); +

    +
    +DESCRIPTION +
    +

    +This function controls which performance optimizations will be applied +by pcre2_compile(). It can be called multiple times with the same compile +context; the effects are cumulative, with the effects of later calls taking +precedence over earlier ones. +

    +

    +The result is zero for success, PCRE2_ERROR_NULL if ccontext is NULL, +or PCRE2_ERROR_BADOPTION if directive is unknown. The latter could be +useful to detect if a certain optimization is available. +

    +

    +The list of possible values for the directive parameter are: +

    +  PCRE2_OPTIMIZATION_FULL   Enable all optimizations (default)
    +  PCRE2_OPTIMIZATION_NONE   Disable all optimizations
    +  PCRE2_AUTO_POSSESS        Enable auto-possessification
    +  PCRE2_AUTO_POSSESS_OFF    Disable auto-possessification
    +  PCRE2_DOTSTAR_ANCHOR      Enable implicit dotstar anchoring
    +  PCRE2_DOTSTAR_ANCHOR_OFF  Disable implicit dotstar anchoring
    +  PCRE2_START_OPTIMIZE      Enable start-up optimizations at match time
    +  PCRE2_START_OPTIMIZE_OFF  Disable start-up optimizations at match time
    +
    +There is a complete description of the PCRE2 native API, including detailed +descriptions directive parameter values in the +pcre2api +page. +

    +Return to the PCRE2 index page. +

    diff --git a/doc/html/pcre2_set_substitute_callout.html b/doc/html/pcre2_set_substitute_callout.html index 7ae3a39..8640728 100644 --- a/doc/html/pcre2_set_substitute_callout.html +++ b/doc/html/pcre2_set_substitute_callout.html @@ -20,7 +20,7 @@ SYNOPSIS

    int pcre2_set_substitute_callout(pcre2_match_context *mcontext, - int (*callout_function)(pcre2_substitute_callout_block *), + int (*callout_function)(pcre2_substitute_callout_block *, void *), void *callout_data);


    diff --git a/doc/html/pcre2_set_substitute_case_callout.html b/doc/html/pcre2_set_substitute_case_callout.html new file mode 100644 index 0000000..ab50687 --- /dev/null +++ b/doc/html/pcre2_set_substitute_case_callout.html @@ -0,0 +1,45 @@ + + +pcre2_set_substitute_case_callout specification + + +

    pcre2_set_substitute_case_callout man page

    +

    +Return to the PCRE2 index page. +

    +

    +This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
    +
    +SYNOPSIS +
    +

    +#include <pcre2.h> +

    +

    +int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +

    +
    +DESCRIPTION +
    +

    +This function sets the substitute case callout fields in a match context (the +first argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +

    +

    +There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

    +Return to the PCRE2 index page. +

    diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html index 6b60ee9..079cf17 100644 --- a/doc/html/pcre2api.html +++ b/doc/html/pcre2api.html @@ -179,6 +179,10 @@ document for an overview of all the PCRE2 documentation.
    int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, int (*guard_function)(uint32_t, void *), void *user_data); +
    +
    +int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive);


    PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS

    @@ -203,6 +207,13 @@ document for an overview of all the PCRE2 documentation. void *callout_data);

    +int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +
    +
    int pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE value);
    @@ -808,6 +819,7 @@ following compile-time parameters: The compile time nested parentheses limit The maximum length of the pattern string The extra options bits (none set by default) + Which performance optimizations the compiler should apply A compile context is also required if you are using custom memory management. If none of these apply, just pass NULL as the context argument of @@ -952,6 +964,110 @@ The first argument to the callout function gives the current depth of nesting, and the second is user data that is set up by the last argument of pcre2_set_compile_recursion_guard(). The callout function should return zero if all is well, or non-zero to force an error. +
    +
    +int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); +
    +
    +PCRE2 can apply various performance optimizations during compilation, in order +to make matching faster. For example, the compiler might convert some regex +constructs into an equivalent construct which pcre2_match() can execute +faster. By default, all available optimizations are enabled. However, in rare +cases, one might wish to disable specific optimizations. For example, if it is +known that some optimizations cannot benefit a certain regex, it might be +desirable to disable them, in order to speed up compilation. +

    +

    +The permitted values of directive are as follows: +

    +  PCRE2_OPTIMIZATION_FULL
    +
    +Enable all optional performance optimizations. This is the default value. +
    +  PCRE2_OPTIMIZATION_NONE
    +
    +Disable all optional performance optimizations. +
    +  PCRE2_AUTO_POSSESS
    +  PCRE2_AUTO_POSSESS_OFF
    +
    +Enable/disable "auto-possessification" of variable quantifiers such as * and +. +This optimization, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +disable this optimization if you want the matching functions to do a full, +unoptimized search and run all the callouts. +
    +  PCRE2_DOTSTAR_ANCHOR
    +  PCRE2_DOTSTAR_ANCHOR_OFF
    +
    +Enable/disable an optimization that is applied when .* is the first significant +item in a top-level branch of a pattern, and all the other branches also start +with .* or with \A or \G or ^. Such a pattern is automatically anchored if +PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set for any +^ items. Otherwise, the fact that any match must start either at the start of +the subject or following a newline is remembered. Like other optimizations, +this can cause callouts to be skipped. +

    +

    +Dotstar anchor optimization is automatically disabled for .* if it is inside an +atomic group or a capture group that is the subject of a backreference, or if +the pattern contains (*PRUNE) or (*SKIP). +

    +  PCRE2_START_OPTIMIZE
    +  PCRE2_START_OPTIMIZE_OFF
    +
    +Enable/disable optimizations which cause matching functions to scan the subject +string for specific code unit values before attempting a match. For example, if +it is known that an unanchored match must start with a specific value, the +matching code searches the subject for that value, and fails immediately if it +cannot find it, without actually running the main matching function. This means +that a special item such as (*COMMIT) at the start of a pattern is not +considered until after a suitable starting point for the match has been found. +Also, when callouts or (*MARK) items are in use, these "start-up" optimizations +can cause them to be skipped if the pattern is never actually used. The start-up +optimizations are in effect a pre-scan of the subject that takes place before +the pattern is run. +

    +

    +Disabling start-up optimizations ensures that in cases where the result is "no +match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are +considered at every possible starting position in the subject string. +

    +

    +Disabling start-up optimizations may change the outcome of a matching operation. +Consider the pattern +

    +  (*COMMIT)ABC
    +
    +When this is compiled, PCRE2 records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run without start-up optimizations, the initial scan along the subject +string does not happen. The first match attempt is run starting from "D" and +when this fails, (*COMMIT) prevents any further matches being tried, so the +overall result is "no match". +

    +

    +Another start-up optimization makes use of a minimum length for a matching +subject, which is recorded when possible. Consider the pattern +

    +  (*MARK:1)B(*MARK:2)(X|Y)
    +
    +The minimum length for a match is two characters. If the subject is "XXBB", the +"starting character" optimization skips "XX", then tries to match "BB", which +is long enough. In the process, (*MARK:2) is encountered and remembered. When +the match attempt fails, the next "B" is found, but there is only one character +left, so there are no more attempts, and "no match" is returned with the "last +mark seen" set to "2". Without start-up optimizations, however, matches are +tried at every possible starting position, including at the end of the subject, +where (*MARK:1) is encountered, but there is no "B", so the "last mark seen" +that is returned is "1". In this case, the optimizations do not affect the +overall match result, which is still "no match", but they do affect the +auxiliary information that is returned.


    The match context @@ -1011,6 +1127,19 @@ made by pcre2_substitute(). Details are given in the section entitled below.

    +int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +
    +
    +This sets up a callout function for PCRE2 to call when performing case +transformations inside pcre2_substitute(). Details are given in the +section entitled "Creating a new string with substitutions" +below. +
    +
    int pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE value);
    @@ -1228,7 +1357,10 @@ for the amount of heap memory used by pcre2_match() or The output is a uint32_t integer that is set to one if support for just-in-time compiling is included in the library; otherwise it is set to zero. Note that having the support in the library does not guarantee that JIT will be used for -any given match. See the +any given match, and neither does it guarantee that JIT will actually be able +to function, because it may not be able to allocate executable memory in some +environments. There is a special call to pcre2_jit_compile() that can be +used to check this. See the pcre2jit documentation for more details.
    @@ -1431,7 +1563,7 @@ respectively, when pcre2_compile() returns NULL because a compilation
     error has occurred.
     

    -There are nearly 100 positive error codes that pcre2_compile() may return +There are over 100 positive error codes that pcre2_compile() may return if it finds an error in the pattern. There are also some negative error codes that are used for invalid UTF strings when validity checking is in force. These are the same as given by pcre2_match() and pcre2_dfa_match(), and @@ -1539,6 +1671,16 @@ after any internal newline. However, it does not match after a newline at the end of the subject, for compatibility with Perl. If you want a multiline circumflex also to match after a terminating newline, you must set PCRE2_ALT_CIRCUMFLEX. +

    +  PCRE2_ALT_EXTENDED_CLASS
    +
    +Alters the parsing of character classes to follow the extended syntax +described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact +on the behaviour of the Perl-specific "(?[...])" syntax for extended classes, +but instead enables the alternative syntax of extended class behaviour inside +ordinary "[...]" character classes. See the +pcre2pattern +documentation for details of the character classes supported.
       PCRE2_ALT_VERBNAMES
     
    @@ -1569,16 +1711,31 @@ letters in the subject. It is equivalent to Perl's /i option, and it can be changed within a pattern by a (?i) option setting. If either PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all characters with more than one other case, and for all characters whose code points are greater than -U+007F. Note that there are two ASCII characters, K and S, that, in addition to +U+007F. +

    +

    +Note that there are two ASCII characters, K and S, that, in addition to their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin sign) and U+017F (long S) respectively. If you do not want this case equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT.

    +One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +

    +

    For lower valued characters with only one other case, a lookup table is used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used for all code points less than 256, and higher code points (available only in 16-bit or 32-bit mode) are treated as not having another case. +

    +

    +From release 10.45 PCRE2_CASELESS also affects what some of the letter-related +Unicode property escapes (\p and \P) match. The properties Lu (upper case +letter), Ll (lower case letter), and Lt (title case letter) are all treated as +LC (cased letter) when PCRE2_CASELESS is set.

       PCRE2_DOLLAR_ENDONLY
     
    @@ -1775,7 +1932,7 @@ This option locks out the use of Unicode properties for handling \B, \b, \D, for the PCRE2_UCP option below. In particular, it prevents the creator of the pattern from enabling this facility by starting the pattern with (*UCP). This option may be useful in applications that process patterns from external -sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. +sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error.
       PCRE2_NEVER_UTF
     
    @@ -1798,85 +1955,57 @@ though the reference can be by name or by number.
       PCRE2_NO_AUTO_POSSESS
     
    -If this option is set, it disables "auto-possessification", which is an -optimization that, for example, turns a+b into a++b in order to avoid +If this (deprecated) option is set, it disables "auto-possessification", which +is an optimization that, for example, turns a+b into a++b in order to avoid backtracks into a+ that can never be successful. However, if callouts are in use, auto-possessification means that some callouts are never taken. You can set this option if you want the matching functions to do a full unoptimized search and run all the callouts, but it is mainly provided for testing purposes. +

    +

    +If a compile context is available, it is recommended to use +pcre2_set_optimize() with the directive PCRE2_AUTO_POSSESS_OFF rather +than the compile option PCRE2_NO_AUTO_POSSESS. Note that PCRE2_NO_AUTO_POSSESS +takes precedence over the pcre2_set_optimize() optimization directives +PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF.

       PCRE2_NO_DOTSTAR_ANCHOR
     
    -If this option is set, it disables an optimization that is applied when .* is -the first significant item in a top-level branch of a pattern, and all the -other branches also start with .* or with \A or \G or ^. The optimization is -automatically disabled for .* if it is inside an atomic group or a capture -group that is the subject of a backreference, or if the pattern contains -(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is -automatically anchored if PCRE2_DOTALL is set for all the .* items and -PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match -must start either at the start of the subject or following a newline is +If this (deprecated) option is set, it disables an optimization that is applied +when .* is the first significant item in a top-level branch of a pattern, and +all the other branches also start with .* or with \A or \G or ^. The +optimization is automatically disabled for .* if it is inside an atomic group +or a capture group that is the subject of a backreference, or if the pattern +contains (*PRUNE) or (*SKIP). When the optimization is not disabled, such a +pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items +and PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any +match must start either at the start of the subject or following a newline is remembered. Like other optimizations, this can cause callouts to be skipped. +(If a compile context is available, it is recommended to use +pcre2_set_optimize() with the directive PCRE2_DOTSTAR_ANCHOR_OFF +instead.)
       PCRE2_NO_START_OPTIMIZE
     
    This is an option whose main effect is at matching time. It does not change what pcre2_compile() generates, but it does affect the output of the JIT -compiler. +compiler. Setting this option is equivalent to calling pcre2_set_optimize() +with the directive parameter set to PCRE2_START_OPTIMIZE_OFF.

    There are a number of optimizations that may occur at the start of a match, in order to speed up the process. For example, if it is known that an unanchored match must start with a specific code unit value, the matching code searches the subject for that value, and fails immediately if it cannot find it, without -actually running the main matching function. This means that a special item -such as (*COMMIT) at the start of a pattern is not considered until after a -suitable starting point for the match has been found. Also, when callouts or -(*MARK) items are in use, these "start-up" optimizations can cause them to be -skipped if the pattern is never actually used. The start-up optimizations are +actually running the main matching function. The start-up optimizations are in effect a pre-scan of the subject that takes place before the pattern is run.

    -The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, -possibly causing performance to suffer, but ensuring that in cases where the -result is "no match", the callouts do occur, and that items such as (*COMMIT) -and (*MARK) are considered at every possible starting position in the subject -string. -

    -

    -Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation. -Consider the pattern -

    -  (*COMMIT)ABC
    -
    -When this is compiled, PCRE2 records the fact that a match must start with the -character "A". Suppose the subject string is "DEFABC". The start-up -optimization scans along the subject, finds "A" and runs the first match -attempt from there. The (*COMMIT) item means that the pattern must match the -current starting position, which in this case, it does. However, if the same -match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the -subject string does not happen. The first match attempt is run starting from -"D" and when this fails, (*COMMIT) prevents any further matches being tried, so -the overall result is "no match". -

    -

    -As another start-up optimization makes use of a minimum length for a matching -subject, which is recorded when possible. Consider the pattern -

    -  (*MARK:1)B(*MARK:2)(X|Y)
    -
    -The minimum length for a match is two characters. If the subject is "XXBB", the -"starting character" optimization skips "XX", then tries to match "BB", which -is long enough. In the process, (*MARK:2) is encountered and remembered. When -the match attempt fails, the next "B" is found, but there is only one character -left, so there are no more attempts, and "no match" is returned with the "last -mark seen" set to "2". If NO_START_OPTIMIZE is set, however, matches are tried -at every possible starting position, including at the end of the subject, where -(*MARK:1) is encountered, but there is no "B", so the "last mark seen" that is -returned is "1". In this case, the optimizations do not affect the overall -match result, which is still "no match", but they do affect the auxiliary -information that is returned. +Disabling the start-up optimizations may cause performance to suffer. However, +this may be desirable for patterns which contain callouts or items such as +(*COMMIT) and (*MARK). See the above description of PCRE2_START_OPTIMIZE_OFF +for further details.
       PCRE2_NO_UTF_CHECK
     
    @@ -1931,9 +2060,16 @@ The second effect of PCRE2_UCP is to force the use of Unicode properties for upper/lower casing operations, even when PCRE2_UTF is not set. This makes it possible to process strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has been compiled with Unicode support (which is the default). -The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless +

    +

    +The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless matching such that ASCII characters match only ASCII characters and non-ASCII -characters match only non-ASCII characters. +characters match only non-ASCII characters. The PCRE2_EXTRA_TURKISH_CASING option +(see above) alters the matching of the 'i' characters to follow their behaviour +in Turkish and Azeri languages. For further details on +PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the +pcre2unicode +page.

       PCRE2_UNGREEDY
     
    @@ -2070,7 +2206,8 @@ characters. The ASCII letter S is case-equivalent to U+017f (long S) and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a caseless match, both characters must either be ASCII or non-ASCII. The option -can be changed with a pattern by the (?r) option setting. +can be changed within a pattern by the (*CASELESS_RESTRICT) or (?r) option +settings.
       PCRE2_EXTRA_ESCAPED_CR_IS_LF
     
    @@ -2097,6 +2234,34 @@ and the end. This is achieved by automatically inserting the code for "\b(?:" at the start of the compiled pattern and ")\b" at the end. The option may be used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is also set. +
    +  PCRE2_EXTRA_NO_BS0
    +
    +If this option is set (note that its final character is the digit 0) it locks +out the use of the sequence \0 unless at least one more octal digit follows. +
    +  PCRE2_EXTRA_PYTHON_OCTAL
    +
    +If this option is set, PCRE2 follows Python's rules for interpreting octal +escape sequences. The rules for handling sequences such as \14, which could +be an octal number or a back reference are different. Details are given in the +pcre2pattern +documentation. +
    +  PCRE2_EXTRA_NEVER_CALLOUT
    +
    +If this option is set, PCRE2 treats callouts in the pattern as a syntax error, +returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the application +knows that a callout will not be provided to pcre2_match(), so that +callouts in the pattern are not silently ignored. +
    +  PCRE2_EXTRA_TURKISH_CASING
    +
    +This option alters case-equivalence of the 'i' letters to follow the +alphabet used by Turkish and Azeri languages. The option can be changed within +a pattern by the (*TURKISH_CASING) start-of-pattern setting. Either the UTF or +UCP options must be set. In the 8-bit library, UTF must be set. This option +cannot be combined with PCRE2_EXTRA_CASELESS_RESTRICT.


    JUST-IN-TIME (JIT) COMPILATION

    @@ -2303,6 +2468,7 @@ following are true: PCRE2_DOTALL is in force for .* Neither (*PRUNE) nor (*SKIP) appears in the pattern PCRE2_NO_DOTSTAR_ANCHOR is not set + Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF

    For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the options returned for PCRE2_INFO_ALLOPTIONS. @@ -3646,9 +3812,10 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If this option is set, however, pcre2_substitute() continues to go through the motions of matching and substituting (without, of course, writing anything) -in order to compute the size of buffer that is needed. This value is passed -back via the outlengthptr variable, with the result of the function still -being PCRE2_ERROR_NOMEMORY. +in order to compute the size of buffer that is needed, which will include the +extra space for the terminating NUL. This value is passed back via the +outlengthptr variable, with the result of the function still being +PCRE2_ERROR_NOMEMORY.

    Passing a buffer size of zero is a permitted way of finding out how much memory @@ -3667,18 +3834,26 @@ If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted in any way. By default, however, a dollar character is an escape character that can specify the insertion of characters from capture groups and names from (*MARK) or other control verbs in the pattern. Dollar is the only escape -character (backslash is treated as literal). The following forms are always +character (backslash is treated as literal). The following forms are recognized:

       $$                  insert a dollar character
    -  $<n> or ${<n>}      insert the contents of group <n>
    +  $n or ${n}          insert the contents of group n
    +  $0 or $&            insert the entire matched substring
    +  $`                  insert the substring that precedes the match
    +  $'                  insert the substring that follows the match
    +  $_                  insert the entire input string
       $*MARK or ${*MARK}  insert a control verb name
     
    -Either a group number or a group name can be given for <n>. Curly brackets are -required only if the following character would be interpreted as part of the -number or name. The number may be zero to include the entire matched string. -For example, if the pattern a(b)c is matched with "=abc=" and the replacement -string "+$1$0$1+", the result is "=+babcb+=". +Either a group number or a group name can be given for n, for example $2 or +$NAME. Curly brackets are required only if the following character would be +interpreted as part of the number or name. The number may be zero to include +the entire matched string. For example, if the pattern a(b)c is matched with +"=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=". +

    +

    +The JavaScript form $<name>, where the angle brackets are part of the syntax, +is also recognized for group names, but not for group numbers or *MARK.

    $*MARK inserts the name from the last encountered backtracking control verb on @@ -3732,28 +3907,53 @@ not influence the extended substitution syntax described below. PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the replacement string. Without this option, only the dollar character is special, and only the group insertion forms listed above are valid. When -PCRE2_SUBSTITUTE_EXTENDED is set, two things change: +PCRE2_SUBSTITUTE_EXTENDED is set, several things change:

    Firstly, backslash in a replacement string is interpreted as an escape -character. The usual forms such as \n or \x{ddd} can be used to specify -particular character codes, and backslash followed by any non-alphanumeric -character quotes that character. Extended quoting can be coded using \Q...\E, -exactly as in pattern strings. +character. The usual forms such as \x{ddd} can be used to specify particular +character codes, and backslash followed by any non-alphanumeric character +quotes that character. Extended quoting can be coded using \Q...\E, exactly +as in pattern strings. The escapes \b and \v are interpreted as the +characters backspace and vertical tab, respectively. +

    +

    +The interpretation of backslash followed by one or more digits is the same as +in a pattern, which in Perl has some ambiguities. Details are given in the +pcre2pattern +page. +

    +

    +The Python form \g<n>, where the angle brackets are part of the syntax and n +is either a group name or number, is recognized as an altertive way of +inserting the contents of a group, for example \g<3>.

    There are also four escape sequences for forcing the case of inserted letters. -The insertion mechanism has three states: no case forcing, force upper case, -and force lower case. The escape sequences change the current state: \U and -\L change to upper or lower case forcing, respectively, and \E (when not -terminating a \Q quoted sequence) reverts to no case forcing. The sequences -\u and \l force the next character (if it is a letter) to upper or lower -case, respectively, and then the state automatically reverts to no case -forcing. Case forcing applies to all inserted characters, including those from -capture groups and letters within \Q...\E quoted sequences. If either -PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode +Case forcing applies to all inserted characters, including those from capture +groups and letters within \Q...\E quoted sequences. The insertion mechanism +has three states: no case forcing, force upper case, and force lower case. The +escape sequences change the current state: \U and \L change to upper or lower +case forcing, respectively, and \E (when not terminating a \Q quoted +sequence) reverts to no case forcing. The sequences \u and \l force the next +character (if it is a letter) to upper or lower case, respectively, and then +the state automatically reverts to no case forcing. +

    +

    +However, if \u is immediately followed by \L or \l is immediately followed +by \U, the next character's case is forced by the first escape sequence, and +subsequent characters by the second. This provides a "title casing" facility +that can be applied to group captures. For example, if group 1 has captured +"heLLo", the replacement string "\u\L$1" becomes "Hello". +

    +

    +If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater -than 127. +than 127. However, only simple case folding, as determined by the Unicode file +CaseFolding.txt is supported. PCRE2 does not support language-specific +special casing rules such as using different lower case Greek sigmas in the +middle and ends of words (as defined in the Unicode file +SpecialCasing.txt).

    Note that case forcing sequences such as \U...\E do not nest. For example, @@ -3762,20 +3962,20 @@ effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do not apply to replacement strings.

    -The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more flexibility to capture group substitution. The syntax is similar to that used by Bash:

    -  ${<n>:-<string>}
    -  ${<n>:+<string1>:<string2>}
    +  ${n:-string}
    +  ${n:+string1:string2}
     
    -As before, <n> may be a group number or a name. The first form specifies a -default value. If group <n> is set, its value is inserted; if not, <string> is -expanded and the result inserted. The second form specifies strings that are -expanded and inserted when group <n> is set or unset, respectively. The first -form is just a convenient shorthand for +As in the simple case, n may be a group number or a name. The first form +specifies a default value. If group n is set, its value is inserted; if +not, the string is expanded and the result inserted. The second form specifies +strings that are expanded and inserted when group n is set or unset, +respectively. The first form is just a convenient shorthand for
    -  ${<n>:+${<n>}:<string>}
    +  ${n:+${n}:string}
     
    Backslash can be used to escape colons and closing curly brackets in the replacement strings. A change of the case forcing state within a replacement @@ -3852,9 +4052,18 @@ Substitution callouts The pcre2_set_substitution_callout() function can be used to specify a callout function for pcre2_substitute(). This information is passed in a match context. The callout function is called after each substitution has -been processed, but it can cause the replacement not to happen. The callout -function is not called for simulated substitutions that happen as a result of -the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. +been processed, but it can cause the replacement not to happen. +

    +

    +The callout function is not called for simulated substitutions that happen as a +result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when +substitution processing exceeds the buffer space provided by the caller, +processing continues by counting code units. The simulation is unable to +populate the callout block, and so the simulation is pessimistic about the +required buffer size. Whichever is larger of accepted or rejected substitution +is reported as the required size. Therefore, the returned buffer length may be +an overestimate (without a substitution callout, it is normally an exact +measurement).

    The first argument of the callout function is a pointer to a substitute callout @@ -3903,6 +4112,107 @@ PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the output and the call to pcre2_substitute() exits, returning the number of matches so far.

    +
    +Substitution case callouts +
    +

    +int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +
    +
    +The pcre2_set_substitution_case_callout() function can be used to specify +a callout function for pcre2_substitute() to use when performing case +transformations. This does not affect any case insensitivity behaviour when +performing a match, but only the user-visible transformations performed when +processing a substitution such as: +

    +    pcre2_substitute(..., "\\U$1", ...)
    +
    +

    +

    +The default case transformations applied by PCRE2 are reasonably complete, and, +in UTF or UCP mode, perform the simple locale-invariant case transformations as +specified by Unicode. This is suitable for the internal (invisible) +case-equivalence procedures used during pattern matching, but an application +may wish to use more sophisticated locale-aware processing for the user-visible +substitution transformations. +

    +

    +One example implementation of the callout_function using the ICU +library would be: +
    +
    +

    +    PCRE2_SIZE
    +    icu_case_callout(
    +      PCRE2_SPTR input, PCRE2_SIZE input_len,
    +      PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
    +      int to_case, void *data_ptr)
    +    {
    +      UErrorCode err = U_ZERO_ERROR;
    +      int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER
    +        ? u_strToLower(output, output_cap, input, input_len, NULL, &err)
    +        : to_case == PCRE2_SUBSTITUTE_CASE_UPPER
    +        ? u_strToUpper(output, output_cap, input, input_len, NULL, &err)
    +        : u_strToTitle(output, output_cap, input, input_len, &first_char_only,
    +                       NULL, &err);
    +      if (U_FAILURE(err)) return (~(PCRE2_SIZE)0);
    +      return r;
    +    }
    +
    +

    +

    +The first and second arguments of the case callout function are the Unicode +string to transform. +

    +

    +The third and fourth arguments are the output buffer and its capacity. +

    +

    +The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, +PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase +and the rest to Unicode lowercase (note that titlecasing sometimes uses Unicode +properties to titlecase each word in a string; but PCRE2 is requesting that only +the single leading character is to be titlecased). +

    +

    +The sixth argument is the callout_data supplied to +pcre2_set_substitute_case_callout(). +

    +

    +The resulting string in the destination buffer may be larger or smaller than the +input, if the casing rules merge or split characters. The return value is the +length required for the output string. If a buffer of sufficient size was +provided to the callout, then the result must be written to the buffer and the +number of code units returned. If the result does not fit in the provided +buffer, then the required capacity must be returned and PCRE2 will not make use +of the output buffer. PCRE2 provides input and output buffers which overlap, so +the callout must support this by suitable internal buffering. +

    +

    +Alternatively, if the callout wishes to indicate an error, then it may return +(~(PCRE2_SIZE)0). In this case pcre2_substitute() will immediately fail with +error PCRE2_ERROR_REPLACECASE. +

    +

    +When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH +option, there are situations when pcre2_substitute() will return an +underestimate of the required buffer size. If you call pcre2_substitute() once +with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the input buffer is too small for +the replacement string to be constructed, then instead of calling the case +callout, pcre2_substitute() will make an estimate of the required buffer size. +The second call should also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that +second call is not guaranteed to succeed either, if the case callout requires +more buffer space than expected. The caller must make repeated attempts in a +loop. +


    DUPLICATE CAPTURE GROUP NAMES

    int pcre2_substring_nametable_scan(const pcre2_code *code, @@ -4177,7 +4487,7 @@ Cambridge, England.


    REVISION

    -Last updated: 24 April 2024 +Last updated: 26 December 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2build.html b/doc/html/pcre2build.html index d4b0d33..f4e127f 100644 --- a/doc/html/pcre2build.html +++ b/doc/html/pcre2build.html @@ -643,7 +643,7 @@ Cambridge, England.


    REVISION

    -Last updated: 15 April 2024 +Last updated: 16 April 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2compat.html b/doc/html/pcre2compat.html index d60182e..5f7e280 100644 --- a/doc/html/pcre2compat.html +++ b/doc/html/pcre2compat.html @@ -71,7 +71,7 @@ interprets them. 7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \p and \P are limited to the general category properties such as Lu and -Nd, the derived properties Any and LC (synonym L&), script names such as Greek +Nd, the derived properties Any and Lc (synonym L&), script names such as Greek or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See the @@ -99,7 +99,12 @@ following examples: \Q\\E \ \\E The \Q...\E sequence is recognized both inside and outside character classes -by both PCRE2 and Perl. +by both PCRE2 and Perl. Another difference from Perl is that any appearance of +\Q or \E inside what might otherwise be a quantifier causes PCRE2 not to +recognize the sequence as a quantifier. Perl recognizes a quantifier if +(redundantly) either of the numbers is inside \Q...\E, but not if the +separating comma is. When not recognized as a quantifier a sequence such as +{\Q1\E,2} is treated as the literal string "{1,2}".

    9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) @@ -120,7 +125,9 @@ confined to that group; it does not extend to the surrounding pattern. This is not always the case in Perl. In particular, if (*THEN) is present in a group that is called as a subroutine, its action is limited to that group, even if the group does not contain any | characters. Note that such groups are -processed as anchored at the point where they are tested. +processed as anchored at the point where they are tested. PCRE2 also confines +all control verbs within atomic assertions, again including (*THEN) in +assertions with only one branch.

    12. If a pattern contains more than one backtracking control verb, the first @@ -159,11 +166,11 @@ warning features, so it gives an error in these cases because they are almost certainly user mistakes.

    -17. In PCRE2, the upper/lower case character properties Lu and Ll are not -affected when case-independent matching is specified. For example, \p{Lu} -always matches an upper case letter. I think Perl has changed in this respect; -in the release at the time of writing (5.38), \p{Lu} and \p{Ll} match all -letters, regardless of case, when case independence is specified. +17. In PCRE2, until release 10.45, the upper/lower case character properties Lu +and Ll were not affected when case-independent matching was specified. Perl has +changed in this respect, and PCRE2 has now changed to match. When caseless +matching is in force, Lu, Ll, and Lt (title case) are all treated as Lc (cased +letter).

    18. From release 5.32.0, Perl locks out the use of \K in lookaround @@ -231,6 +238,10 @@ and condition references such as (?(4)...). PCRE2 supports relative group numbers such as +2 and -4 in all three cases. Perl supports both plus and minus for subroutine calls, but only minus for back references, and no relative numbering at all for conditions. +
    +
    +(m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 extension +that is not available in Perl.

    20. Perl has different limits than PCRE2. See the @@ -252,6 +263,18 @@ handled by PCRE2, either by the interpreter or the JIT. An example is /(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated "abcd" substrings at the end of the subject.

    +

    +23. Both PCRE2 and Perl error when \x{ escapes are invalid, but Perl tries to +recover and prints a warning if the problem was that an invalid hexadecimal +digit was found, since PCRE2 doesn't have warnings it returns an error instead. +Additionally, Perl accepts \x{} and generates NUL unlike PCRE2. +

    +

    +24. From release 10.45, PCRE2 gives an error if \x is not followed by a +hexadecimal digit or a curly bracket. It used to interpret this as the NUL +character. Perl still generates NUL, but warns when in warning mode in most +cases. +


    AUTHOR
    @@ -267,9 +290,9 @@ Cambridge, England. REVISION

    -Last updated: 30 November 2023 +Last updated: 02 October 2024
    -Copyright © 1997-2023 University of Cambridge. +Copyright © 1997-2024 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/doc/html/pcre2convert.html b/doc/html/pcre2convert.html index 6b9fea5..57e8989 100644 --- a/doc/html/pcre2convert.html +++ b/doc/html/pcre2convert.html @@ -182,7 +182,7 @@ Cambridge, England.


    REVISION

    -Last updated: 28 June 2018 +Last updated: 14 November 2023
    Copyright © 1997-2018 University of Cambridge.
    diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html index bd12246..5c9a57a 100644 --- a/doc/html/pcre2grep.html +++ b/doc/html/pcre2grep.html @@ -391,9 +391,10 @@ Read patterns from the file, one per line. As is the case with patterns on the command line, no delimiters should be used. What constitutes a newline when reading the file is the operating system's default interpretation of \n. The --newline option has no effect on this option. Trailing white space is -removed from each line, and blank lines are ignored. An empty file contains no +removed from each line, and blank lines are ignored unless the +--posix-pattern-file option is also provided. An empty file contains no patterns and therefore matches nothing. Patterns read from a file in this way -may contain binary zeros, which are treated as ordinary data characters. +may contain binary zeros, which are treated as ordinary character literals.

    If this option is given more than once, all the specified files are read. A @@ -723,9 +724,9 @@ text.

    $<digits> or ${<digits>} is replaced by the captured substring of the given -decimal number; zero substitutes the whole match. If the number is greater than -the number of capturing substrings, or if the capture is unset, the replacement -is empty. +decimal number; $& (or the legacy $0) substitutes the whole match. If the +number is greater than the number of capturing substrings, or if the capture +is unset, the replacement is empty.

    $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by @@ -808,6 +809,15 @@ when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while allowing \w to match Unicode letters and digits.

    +--posix-pattern-file +When patterns are provided with the -f option, do not trim trailing +spaces or ignore empty lines in a similar way than other grep tools. To keep +the behaviour consistent with older versions, if the pattern read was +terminated with CRLF (as character literals) then both characters won't be +included as part of it, so if you really need to have pattern ending in '\r', +use a escape sequence or provide it by a different method. +

    +

    -q, --quiet Work quietly, that is, display nothing except error messages. The exit status indicates whether or not any matches were found. @@ -993,7 +1003,7 @@ scripts or echoing specific strings during matching by making use of PCRE2's callout facility. However, this support can be completely or partially disabled when pcre2grep is built. You can find out whether your binary has support for callouts by running it with the --help option. If callout support is -completely disabled, all callouts in patterns are ignored by pcre2grep. +completely disabled, callouts in patterns are forbidden by pcre2grep. If the facility is partially disabled, calling external programs is not supported, and callouts that request it are ignored.

    @@ -1015,9 +1025,9 @@ available, provided that callouts were not completely disabled when zero-terminated string, which means it should not contain any internal binary zeros. It is written to the output, having first been passed through the same escape processing as text from the --output (-O) option (see -above). However, $0 cannot be used to insert a matched substring because the -match is still in progress. Instead, the single character '0' is inserted. Any -syntax errors in the string (for example, a dollar not followed by another +above). However, $0 or $& cannot be used to insert a matched substring because +the match is still in progress. Instead, the single character '0' is inserted. +Any syntax errors in the string (for example, a dollar not followed by another character) causes the callout to be ignored. No terminator is added to the output string, so if you want a newline, you must include it explicitly using the escape $n. For example: @@ -1047,9 +1057,9 @@ arguments: Any substring (including the executable name) may contain escape sequences started by a dollar character. These are the same as for the --output -(-O) option documented above, except that $0 cannot insert the matched -string because the match is still in progress. Instead, the character '0' -is inserted. If you need a literal dollar or pipe character in any +(-O) option documented above, except that $0 or $& cannot insert the +matched string because the match is still in progress. Instead, the character +'0' is inserted. If you need a literal dollar or pipe character in any substring, use $$ or $| respectively. Here is an example:
       echo -e "abcde\n12345" | pcre2grep \
    @@ -1116,7 +1126,7 @@ Cambridge, England.
     


    REVISION

    -Last updated: 22 December 2023 +Last updated: 09 October 2024
    Copyright © 1997-2023 University of Cambridge.
    diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html index d97d800..6835cd8 100644 --- a/doc/html/pcre2jit.html +++ b/doc/html/pcre2jit.html @@ -64,7 +64,7 @@ platforms: If --enable-jit is set on an unsupported platform, compilation fails.

    -A client program can tell if JIT support is available by calling +A client program can tell if JIT support has been compiled by calling pcre2_config() with the PCRE2_CONFIG_JIT option. The result is one if PCRE2 was built with JIT support, and zero otherwise. However, having the JIT code available does not guarantee that it will be used for any particular @@ -72,11 +72,19 @@ match. One reason for this is that there are a number of options and pattern items that are not supported by JIT (see below). Another reason is that in some environments JIT is unable to get -memory in which to build its compiled code. The only guarantee from +executable memory in which to build its compiled code. The only guarantee from pcre2_config() is that if it returns zero, JIT will definitely not be used.

    +As of release 10.45 there is a more informative way to test for JIT support. If +pcre2_compile_jit() is called with the single option PCRE2_JIT_TEST_ALLOC +it returns zero if JIT is available and has a working allocator. Otherwise it +returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate executable +memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not compiled. The +code argument is ignored, so it can be a NULL value. +

    +

    A simple program does not need to check availability in order to use JIT when possible. The API is implemented in a way that falls back to the interpretive code if JIT is not available or cannot be used for a given match. For programs @@ -126,7 +134,8 @@ option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore PCRE2_JIT_COMPLETE and just compile code for partial matching. If pcre2_jit_compile() is called with no option bits set, it immediately -returns zero. This is an alternative way of testing whether JIT is available. +returns zero. This is an alternative way of testing whether JIT support has +been compiled.

    At present, it is not possible to free JIT compiled code except when the entire @@ -487,7 +496,7 @@ Cambridge, England.


    REVISION

    -Last updated: 21 February 2024 +Last updated: 22 August 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2limits.html b/doc/html/pcre2limits.html index 8152ed2..514c50b 100644 --- a/doc/html/pcre2limits.html +++ b/doc/html/pcre2limits.html @@ -96,7 +96,7 @@ Cambridge, England. REVISION

    -Last updated: August 2023 +Last updated: 16 August 2023
    Copyright © 1997-2023 University of Cambridge.
    diff --git a/doc/html/pcre2matching.html b/doc/html/pcre2matching.html index 3b8b629..4d02325 100644 --- a/doc/html/pcre2matching.html +++ b/doc/html/pcre2matching.html @@ -27,7 +27,7 @@ please consult the man page, in case the conversion went wrong. This document describes the two different algorithms that are available in PCRE2 for matching a compiled regular expression against a given subject string. The "standard" algorithm is the one provided by the pcre2_match() -function. This works in the same as Perl's matching function, and provide a +function. This works in the same as Perl's matching function, and provides a Perl-compatible matching operation. The just-in-time (JIT) optimization that is described in the pcre2jit @@ -42,7 +42,7 @@ these are described below.

    When there is only one possible way in which a given subject string can match a pattern, the two algorithms give the same answer. A difference arises, however, -when there are multiple possibilities. For example, if the pattern +when there are multiple possibilities. For example, if the anchored pattern

       ^<.*>
     
    @@ -115,9 +115,9 @@ algorithm after the first match (which is necessarily the shortest) is found.

    Note that the size of vector needed to contain all the results depends on the -number of simultaneous matches, not on the number of parentheses in the -pattern. Using pcre2_match_data_create_from_pattern() to create the match -data block is therefore not advisable when doing DFA matching. +number of simultaneous matches, not on the number of capturing parentheses in +the pattern. Using pcre2_match_data_create_from_pattern() to create the +match data block is therefore not advisable when doing DFA matching.

    Note also that all the matches that are found start at the same point in the @@ -166,37 +166,43 @@ possibilities, and PCRE2's implementation of this algorithm does not attempt to do this. This means that no captured substrings are available.

    -3. Because no substrings are captured, backreferences within the pattern are -not supported. -

    -

    -4. For the same reason, conditional expressions that use a backreference as the -condition or test for a specific group recursion are not supported. -

    -

    -5. Again for the same reason, script runs are not supported. +3. Because no substrings are captured, a number of related features are not +available: +
    +
    +(a) Backreferences; +
    +
    +(b) Conditional expressions that use a backreference as the condition or test +for a specific group recursion; +
    +
    +(c) Script runs; +
    +
    +(d) Scan substring assertions.

    -6. Because many paths through the tree may be active, the \K escape sequence, +4. Because many paths through the tree may be active, the \K escape sequence, which resets the start of the match when encountered (but may be on some paths and not on others), is not supported.

    -7. Callouts are supported, but the value of the capture_top field is +5. Callouts are supported, but the value of the capture_top field is always 1, and the value of the capture_last field is always 0.

    -8. The \C escape sequence, which (in the standard algorithm) always matches a -single code unit, even in a UTF mode, is not supported in these modes, because +6. The \C escape sequence, which (in the standard algorithm) always matches a +single code unit, even in a UTF mode, is not supported in UTF modes because the alternative algorithm moves through the subject string one character (not code unit) at a time, for all active paths through the tree.

    -9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not supported. (*FAIL) is supported, and behaves like a failing negative assertion.

    -10. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not +8. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not supported by pcre2_dfa_match().


    ADVANTAGES OF THE ALTERNATIVE ALGORITHM
    @@ -223,15 +229,18 @@ because it has to search for all possible matches, but is also because it is less susceptible to optimization.

    -2. Capturing parentheses, backreferences, script runs, and matching within -invalid UTF string are not supported. +2. Capturing parentheses and other features such as backreferences that rely on +them are not supported. +

    +

    +3. Matching within invalid UTF strings is not supported.

    -3. Although atomic groups are supported, their use does not provide the +4. Although atomic groups are supported, their use does not provide the performance advantage that it does for the standard algorithm.

    -4. JIT optimization is not supported. +5. JIT optimization is not supported.


    AUTHOR

    @@ -244,7 +253,7 @@ Cambridge, England.


    REVISION

    -Last updated: 19 January 2024 +Last updated: 30 August 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2partial.html b/doc/html/pcre2partial.html index 64116c4..067064d 100644 --- a/doc/html/pcre2partial.html +++ b/doc/html/pcre2partial.html @@ -399,7 +399,7 @@ Cambridge, England.


    REVISION

    -Last updated: 04 September 2019 +Last updated: 27 November 2024
    Copyright © 1997-2019 University of Cambridge.
    diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html index cf50c1a..84eb0aa 100644 --- a/doc/html/pcre2pattern.html +++ b/doc/html/pcre2pattern.html @@ -14,37 +14,41 @@ please consult the man page, in case the conversion went wrong.


    PCRE2 REGULAR EXPRESSION DETAILS

    @@ -52,9 +56,11 @@ The syntax and semantics of the regular expressions that are supported by PCRE2 are described in detail below. There is a quick-reference syntax summary in the pcre2syntax page. PCRE2 tries to match Perl syntax and semantics as closely as it can. -PCRE2 also supports some alternative regular expression syntax (which does not -conflict with the Perl syntax) in order to provide some compatibility with -regular expressions in Python, .NET, and Oniguruma. +PCRE2 also supports some alternative regular expression syntax that does not +conflict with the Perl syntax in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. There are in addition some +options that enable alternative syntax and semantics that are not the same as +in Perl.

    Perl's regular expressions are described in its own documentation, and regular @@ -74,7 +80,19 @@ function, are discussed in the pcre2matching page.

    -
    SPECIAL START-OF-PATTERN ITEMS
    +
    EBCDIC CHARACTER CODES
    +

    +Most computers use ASCII or Unicode for encoding characters, and PCRE2 assumes +this by default. However, it can be compiled to run in an environment that uses +the EBCDIC code, which is the case for some IBM mainframe operating systems. In +the sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. Differences in behaviour when PCRE2 is running in +an EBCDIC environment are described in the section +"EBCDIC environments" +below, which you can ignore unless you really are in an EBCDIC environment. +

    +
    SPECIAL START-OF-PATTERN ITEMS

    A number of options that can be passed to pcre2_compile() can also be set by special items at the start of a pattern. These are not Perl-compatible, but @@ -141,7 +159,8 @@ Disabling auto-possessification

    If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting -the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making quantifiers +the PCRE2_NO_AUTO_POSSESS option, or calling pcre2_set_optimize() with +a PCRE2_AUTO_POSSESS_OFF directive. This stops PCRE2 from making quantifiers possessive when what follows cannot match the repeated item. For example, by default a+b is treated as a++b. For more details, see the pcre2api @@ -152,8 +171,9 @@ Disabling start-up optimizations

    If a pattern starts with (*NO_START_OPT), it has the same effect as setting the -PCRE2_NO_START_OPTIMIZE option. This disables several optimizations for quickly -reaching "no match" results. For more details, see the +PCRE2_NO_START_OPTIMIZE option, or calling pcre2_set_optimize() with +a PCRE2_START_OPTIMIZE_OFF directive. This disables several optimizations for +quickly reaching "no match" results. For more details, see the pcre2api documentation.

    @@ -162,7 +182,8 @@ Disabling automatic anchoring

    If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect as -setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimizations that +setting the PCRE2_NO_DOTSTAR_ANCHOR option, or calling pcre2_set_optimize() +with a PCRE2_DOTSTAR_ANCHOR_OFF directive. This disables optimizations that apply to patterns whose top-level branches all start with .* (match any number of arbitrary characters). For more details, see the pcre2api @@ -275,14 +296,6 @@ at compile time. This effect can also be achieved by starting a pattern with (*BSR_ANYCRLF). For completeness, (*BSR_UNICODE) is also recognized, corresponding to PCRE2_BSR_UNICODE.

    -
    EBCDIC CHARACTER CODES
    -

    -PCRE2 can be compiled to run in an environment that uses EBCDIC as its -character code instead of ASCII or Unicode (typically a mainframe system). In -the sections below, character code values are ASCII or Unicode; in an EBCDIC -environment these characters may have different code values, and there are no -code points greater than 255. -


    CHARACTERS AND METACHARACTERS

    A regular expression is a pattern that is matched against a subject string from @@ -298,7 +311,10 @@ ASCII characters, K and S, that, in addition to their lower case ASCII equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the PCRE2_EXTRA_CASELESS_RESTRICT option is in force (either passed to -pcre2_compile() or set by (?r) within the pattern). +pcre2_compile() or set by (*CASELESS_RESTRICT) or (?r) within the +pattern). If the PCRE2_EXTRA_TURKISH_CASING option is in force (either passed +to pcre2_compile() or set by (*TURKISH_CASING) within the pattern), then +the 'i' letters are matched according to Turkish and Azeri languages.

    The power of regular expressions comes from the ability to include wild cards, @@ -346,7 +362,7 @@ a character class the only metacharacters are:

    If a pattern is compiled with the PCRE2_EXTENDED option, most white space in the pattern, other than in a character class, within a \Q...\E sequence, or -between a # outside a character class and the next newline, inclusive, are +between a # outside a character class and the next newline, inclusive, is ignored. An escaping backslash can be used to include a white space or a # character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same applies, but in addition unescaped space and horizontal tab characters are @@ -404,6 +420,14 @@ by \E later in the pattern, the literal interpretation continues to the end of the pattern (that is, \E is assumed at the end). If the isolated \Q is inside a character class, this causes an error, because the character class is then not terminated by a closing square bracket. +

    +

    +Another difference from Perl is that any appearance of \Q or \E inside what +might otherwise be a quantifier causes PCRE2 not to recognize the sequence as a +quantifier. Perl recognizes a quantifier if (redundantly) either of the numbers +is inside \Q...\E, but not if the separating comma is. When not recognized as +a quantifier a sequence such as {\Q1\E,2} is treated as the literal string +"{1,2}".


    Non-printing characters @@ -424,17 +448,28 @@ environment, these escapes are as follows: \r carriage return (hex 0D) (but see below) \t tab (hex 09) \0dd character with octal code 0dd - \ddd character with octal code ddd, or backreference + \ddd character with octal code ddd, or back reference \o{ddd..} character with octal code ddd.. \xhh character with hex code hh \x{hhh..} character with hex code hhh.. \N{U+hhh..} character with Unicode hex code point hhh.. -By default, after \x that is not followed by {, from zero to two hexadecimal -digits are read (letters can be in upper or lower case). Any number of -hexadecimal digits may appear between \x{ and }. If a character other than a -hexadecimal digit appears between \x{ and }, or if there is no terminating }, -an error occurs. +A description of how back references work is given +later, +following the discussion of +parenthesized groups. +

    +

    +By default, after \x that is not followed by {, one or two hexadecimal +digits are read (letters can be in upper or lower case). If the character that +follows \x is neither { nor a hexadecimal digit, an error occurs. This is +different from Perl's default behaviour, which generates a NUL character, but +is in line with the behaviour of Perl's 'strict' mode in re. +

    +

    +Any number of hexadecimal digits may appear between \x{ and }. If a character +other than a hexadecimal digit appears between \x{ and }, or if there is no +terminating }, an error occurs.

    Characters whose code points are less than 256 can be defined by either of the @@ -481,69 +516,54 @@ the code unit following \c has a code point less than 32 or greater than 126, a compile-time error occurs.

    -When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. \a, \e, -\f, \n, \r, and \t generate the appropriate EBCDIC code values. The \c -escape is processed as specified for Perl in the perlebcdic document. The -only characters that are allowed after \c are A-Z, a-z, or one of @, [, \, ], -^, _, or ?. Any other character provokes a compile-time error. The sequence -\c@ encodes character code 0; after \c the letters (in either case) encode -characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 -(hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F). +For differences in the way some escapes behave in EBCDIC environments, +see section +"EBCDIC environments" +below.

    +
    +Octal escapes and back references +

    -Thus, apart from \c?, these escapes generate the same character code values as -they do in an ASCII environment, though the meanings of the values mostly -differ. For example, \cG always generates code value 7, which is BEL in ASCII -but DEL in EBCDIC. +The escape \o must be followed by a sequence of octal digits, enclosed in +braces. An error occurs if this is not the case. This escape provides a way of +specifying character code points as octal numbers greater than 0777, and it +also allows octal numbers and backreferences to be unambiguously distinguished.

    -The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but -because 127 is not a control character in EBCDIC, Perl makes it generate the -APC character. Unfortunately, there are several variants of EBCDIC. In most of -them the APC character has the value 255 (hex FF), but in the one Perl calls -POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC -values, PCRE2 makes \c? generate 95; otherwise it generates 255. +If braces are not used, after \0 up to two further octal digits are read. +However, if the PCRE2_EXTRA_NO_BS0 option is set, at least one more octal digit +must follow \0 (use \00 to generate a NUL character). Make sure you supply +two digits after the initial zero if the pattern character that follows is +itself an octal digit.

    -After \0 up to two further octal digits are read. If there are fewer than two -digits, just those that are present are used. Thus the sequence \0\x\015 -specifies two binary zeros followed by a CR character (code value 13). Make -sure you supply two digits after the initial zero if the pattern character that -follows is itself an octal digit. +Inside a character class, when a backslash is followed by any octal digit, up +to three octal digits are read to generate a code point. Any subsequent digits +stand for themselves. The sequences \8 and \9 are treated as the literal +characters "8" and "9".

    -The escape \o must be followed by a sequence of octal digits, enclosed in -braces. An error occurs if this is not the case. This escape is a recent -addition to Perl; it provides way of specifying character code points as octal -numbers greater than 0777, and it also allows octal numbers and backreferences -to be unambiguously specified. +Outside a character class, Perl's handling of a backslash followed by a digit +other than 0 is complicated by ambiguity, and Perl has changed over time, +causing PCRE2 also to change. From PCRE2 release 10.45 there is an option +called PCRE2_EXTRA_PYTHON_OCTAL that causes PCRE2 to use Python's unambiguous +rules. The next two subsections describe the two sets of rules.

    For greater clarity and unambiguity, it is best to avoid following \ by a digit greater than zero. Instead, use \o{...} or \x{...} to specify numerical -character code points, and \g{...} to specify backreferences. The following -paragraphs describe the old, ambiguous syntax. -

    -

    -The handling of a backslash followed by a digit other than 0 is complicated, -and Perl has changed over time, causing PCRE2 also to change. -

    -

    -Outside a character class, PCRE2 reads the digit and any following digits as a -decimal number. If the number is less than 10, begins with the digit 8 or 9, or -if there are at least that many previous capture groups in the expression, the -entire sequence is taken as a backreference. A description of how this -works is given -later, -following the discussion of -parenthesized groups. -Otherwise, up to three octal digits are read to form a character code. +character code points, and \g{...} to specify backreferences.

    +
    +Perl rules for non-class backslash 1-9 +

    -Inside a character class, PCRE2 handles \8 and \9 as the literal characters -"8" and "9", and otherwise reads up to three octal digits following the -backslash, using them to generate a data character. Any subsequent digits stand -for themselves. For example, outside a character class: +All the digits that follow the backslash are read as a decimal number. If the +number is less than 10, begins with the digit 8 or 9, or if there are at least +that many previous capture groups in the expression, the entire sequence is +taken as a back reference. Otherwise, up to three octal digits are read to form +a character code. For example:

       \040   is another way of writing an ASCII space
       \40    is the same, provided there are fewer than 40 previous capture groups
    @@ -560,6 +580,19 @@ must not be introduced by a leading zero, because no more than three octal
     digits are ever read.
     


    +Python rules for non_class backslash 1-9 +
    +

    +If there are at least three octal digits after the backslash, exactly three are +read as an octal code point number, but the value must be no greater than +\377, even in modes where higher code point values are supported. Any +subsequent digits stand for themselves. If there are fewer than three octal +digits, the sequence is taken as a decimal back reference. Thus, for example, +\12 is always a back reference, independent of how many captures there are in +the pattern. An error is generated for a reference to a non-existent capturing +group. +

    +
    Constraints on character values

    @@ -805,7 +838,7 @@ When PCRE2 is built with Unicode support (the default), three additional escape sequences that match characters with specific properties are available. They can be used in any mode, though in 8-bit and 16-bit non-UTF modes these sequences are of course limited to testing characters whose code points are -less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points +less than U+0100 or U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all treated as being in the Unknown script and with an unassigned type.

    @@ -823,12 +856,33 @@ The extra escape sequences that provide property support are: \P{xx} a character without the xx property \X a Unicode extended grapheme cluster
    -The property names represented by xx above are not case-sensitive, and in -accordance with Unicode's "loose matching" rules, spaces, hyphens, and -underscores are ignored. There is support for Unicode script names, Unicode -general category properties, "Any", which matches any character (including -newline), Bidi_Class, a number of binary (yes/no) properties, and some special -PCRE2 properties (described +For compatibility with Perl, negation can be specified by including a +circumflex between the opening brace and the property. For example, \p{^Lu} is +the same as \P{Lu}. +

    +

    +In accordance with Unicode's "loose matching" rules, ASCII white space +characters, hyphens, and underscores are ignored in the properties represented +by xx above. As well as the space character, ASCII white space can be +tab, linefeed, vertical tab, formfeed, or carriage return. +

    +

    +Some properties are specified as a name only; others as a name and a value, +separated by a colon or an equals sign. The names and values consist of ASCII +letters and digits (with one Perl-specific exception, see below). They are not +case sensitive. Note, however, that the escapes themselves, \p and \P, +are case sensitive. There are abbreviations for many names. The following +examples are all equivalent: +

    +  \p{bidiclass=al}
    +  \p{BC=al}
    +  \p{ Bidi_Class : AL }
    +  \p{ Bi-di class = Al }
    +  \P{ ^ Bi-di class = Al }
    +
    +There is support for Unicode script names, Unicode general category properties, +"Any", which matches any character (including newline), Bidi_Class, a number of +binary (yes/no) properties, and some special PCRE2 properties (described below). Certain other Perl properties such as "InMusicalSymbols" are not supported by PCRE2. Note that \P{Any} does not match any characters, so always causes a @@ -844,10 +898,11 @@ Extensions") with which it is commonly used. Using the Adlam script as an example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas \p{scx:Adlam} matches, in addition, characters that have Adlam in their extensions list. The full names "script" and "script extensions" for the -property types are recognized, and a equals sign is an alternative to the -colon. If a script name is given without a property type, for example, -\p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this -interpretation at release 5.26 and PCRE2 changed at release 10.40. +property types are recognized and, as for all property specifications, an +equals sign is an alternative to the colon. If a script name is given without a +property type, for example, \p{Adlam}, it is treated as \p{scx:Adlam}. Perl +changed to this interpretation at release 5.26 and PCRE2 changed at release +10.40.

    Unassigned characters (and in non-UTF 32-bit mode, characters with code points @@ -865,15 +920,10 @@ The general category property for \p and \P

    Each character has exactly one Unicode general category property, specified by -a two-letter abbreviation. For compatibility with Perl, negation can be -specified by including a circumflex between the opening brace and the property -name. For example, \p{^Lu} is the same as \P{Lu}. -

    -

    -If only one letter is specified with \p or \P, it includes all the general -category properties that start with that letter. In this case, in the absence -of negation, the curly brackets in the escape sequence are optional; these two -examples have the same effect: +a two-letter abbreviation. If only one letter is specified with \p or \P, it +includes all the general category properties that start with that letter. In +this case, in the absence of negation, the curly brackets in the escape +sequence are optional; these two examples have the same effect:

       \p{L}
       \pL
    @@ -888,6 +938,7 @@ The following general category property codes are supported:
       Cs    Surrogate
     
       L     Letter
    +  Lc    Cased letter
       Ll    Lower case letter
       Lm    Modifier letter
       Lo    Other letter
    @@ -924,9 +975,13 @@ The following general category property codes are supported:
       Zp    Paragraph separator
       Zs    Space separator
     
    -The special property LC, which has the synonym L&, is also supported: it -matches a character that has the Lu, Ll, or Lt property, in other words, a -letter that is not classified as a modifier or "other". +Perl originally used the name L& for the Lc property. This is still supported +by Perl, but discouraged. PCRE2 also still supports it. This property matches +any character that has the Lu, Ll, or Lt property, in other words, any letter +that is not classified as a modifier or "other". From release 10.45 of PCRE2 +the properties Lu, Ll, and Lt are all treated as Lc when case-independent +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. The +other properties are not affected by caseless matching.

    The Cs (Surrogate) property applies only to characters whose code points are in @@ -948,11 +1003,6 @@ No character that is in the Unicode table has the Cn (unassigned) property. Instead, this property is assumed for any code point that is not in the Unicode table.

    -

    -Specifying caseless matching does not affect these escape sequences. For -example, \p{Lu} always matches only upper case letters. This is different from -the behaviour of current versions of Perl. -


    Binary (yes/no) properties for \p and \P
    @@ -997,10 +1047,11 @@ The recognized classes are: RLI right-to-left isolate RLO right-to-left override S segment separator - WS which space + WS white space -An equals sign may be used instead of a colon. The class names are -case-insensitive; only the short names listed above are recognized. +As in all property specifications, an equals sign may be used instead of a +colon and the class names are case-insensitive. Only the short names listed +above are recognized; PCRE2 does not at present support any long alternatives.


    Extended grapheme clusters @@ -1073,11 +1124,11 @@ explicitly. These properties are: Xan matches characters that have either the L (letter) or the N (number) property. Xps matches the characters tab, linefeed, vertical tab, form feed, or -carriage return, and any other character that has the Z (separator) property. -Xsp is the same as Xps; in PCRE1 it used to exclude vertical tab, for Perl -compatibility, but Perl changed. Xwd matches the same characters as Xan, plus -those that match Mn (non-spacing mark) or Pc (connector punctuation, which -includes underscore). +carriage return, and any other character that has the Z (separator) property +(this includes the space character). Xsp is the same as Xps; in PCRE1 it used +to exclude vertical tab, for Perl compatibility, but Perl changed. Xwd matches +the same characters as Xan, plus those that match Mn (non-spacing mark) or Pc +(connector punctuation, which includes underscore).

    There is another non-standard property, Xuc, which matches any character that @@ -1389,13 +1440,12 @@ is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash.

    -For example, the character class [aeiou] matches any lower case vowel, while -[^aeiou] matches any character that is not a lower case vowel. Note that a -circumflex is just a convenient notation for specifying the characters that -are in the class by enumerating those that are not. A class that starts with a -circumflex is not an assertion; it still consumes a character from the subject -string, and therefore it fails if the current pointer is at the end of the -string. +For example, the character class [aeiou] matches any lower case English vowel, +whereas [^aeiou] matches all other characters. Note that a circumflex is just a +convenient notation for specifying the characters that are in the class by +enumerating those that are not. A class that starts with a circumflex is not an +assertion; it still consumes a character from the subject string, and therefore +it fails to match if the current pointer is at the end of the string.

    Characters in a class may be specified by their code points using \o, \x, or @@ -1405,7 +1455,10 @@ a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a caseful version would. Note that there are two ASCII characters, K and S, that, in addition to their lower case ASCII equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) -respectively when either PCRE2_UTF or PCRE2_UCP is set. +respectively when either PCRE2_UTF or PCRE2_UCP is set. If you do not want +these ASCII/non-ASCII case equivalences, you can suppress them by setting +PCRE2_EXTRA_CASELESS_RESTRICT, either as an option in a compile context, or by +including (*CASELESS_RESTRICT) or (?r) within a pattern.

    Characters that might indicate line breaks are never treated in any special way @@ -1437,6 +1490,12 @@ or immediately after a range. For example, [b-d-z] matches letters in the range b to d, a hyphen character, or z.

    +There is some special treatment for alphabetic ranges in EBCDIC environments; +see the section +"EBCDIC environments" +below. +

    +

    Perl treats a hyphen as a literal if it appears before or after a POSIX class (see below) or before or after a character type escape such as \d or \H. However, unless the hyphen is the last character in the class, Perl outputs a @@ -1448,9 +1507,9 @@ It is not possible to have the literal character "]" as the end character of a range. A pattern such as [W-]46] is interpreted as a class of two characters ("W" and "-") followed by a literal string "46]", so it would match "W46]" or "-46]". However, if the "]" is escaped with a backslash it is interpreted as -the end of range, so [W-\]46] is interpreted as a class containing a range -followed by two other characters. The octal or hexadecimal representation of -"]" can also be used to end a range. +the end of a range, so [W-\]46] is interpreted as a class containing a range +and two other characters. The octal or hexadecimal representation of "]" can +also be used to end a range.

    Ranges normally include all code points between the start and end characters, @@ -1463,15 +1522,6 @@ this check). However, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates, are always permitted.

    -There is a special case in EBCDIC environments for ranges whose end points are -both specified as literal letters in the same case. For compatibility with -Perl, EBCDIC code points within the range that are not letters are omitted. For -example, [h-k] matches only four characters, even though the codes for h and k -are 0x88 and 0x92, a range of 11 code points. However, if the range is -specified numerically, for example, [\x88-\x92] or [h-\x92], all code points -are included. -

    -

    If a range that includes letters is used when caseless matching is set, it matches the letters in either case. For example, [W-c] is equivalent to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character @@ -1487,18 +1537,132 @@ whereas [\w] includes underscore. A positive character class should be read as something AND NOT ...".

    -The only metacharacters that are recognized in character classes are backslash, -hyphen (only where it can be interpreted as specifying a range), circumflex -(only at the start), opening square bracket (only when it can be interpreted as -introducing a POSIX class name, or for a special compatibility feature - see -the next two sections), and the terminating closing square bracket. However, -escaping other non-alphanumeric characters does no harm. +The metacharacters that are recognized in character classes are backslash, +hyphen (when it can be interpreted as specifying a range), circumflex +(only at the start), and the terminating closing square bracket. An opening +square bracket is also special when it can be interpreted as introducing a +POSIX class (see +"Posix character classes" +below), or a special compatibility feature (see +"Compatibility feature for word boundaries" +below. Escaping any non-alphanumeric character in a class turns it into a +literal, whether or not it would otherwise be a metacharacter. +

    +
    PERL EXTENDED CHARACTER CLASSES
    +

    +From release 10.45 PCRE2 supports Perl's (?[...]) extended character class +syntax. This can be used to perform set operations such as intersection on +character classes. +

    +

    +The syntax permitted within (?[...]) is quite different to ordinary character +classes. Inside the extended class, there is an expression syntax consisting of +"atoms", operators, and ordinary parentheses "()" used for grouping. Such +classes always have the Perl /xx modifier (PCRE2 option PCRE2_EXTENDED_MORE) +turned on within them. This means that literal space and tab characters are +ignored everywhere in the class. +

    +

    +The allowed atoms are individual characters specified by escape sequences such +as \n or \x{123}, character types such as \d, POSIX classes such as +[:alpha:], and nested ordinary (non-extended) character classes. For example, +in (?[\d & [...]]) the nested class [...] follows the usual rules for ordinary +character classes, in which parentheses are not metacharacters, and character +literals and ranges are permitted. +

    +

    +Character literals and ranges may not appear outside a nested ordinary +character class because they are not atoms in the extended syntax. The extended +syntax does not introduce any additional escape sequences, so (?[\y]) is an +unknown escape, as it would be in [\y]. +

    +

    +In the extended syntax, ^ does not negate a class (except within an +ordinary class nested inside an extended class); it is instead a binary +operator. +

    +

    +The binary operators are "&" (intersection), "|" or "+" (union), "-" +(subtraction) and "^" (symmetric difference). These are left-associative and +"&" has higher (tighter) precedence, while the others have equal lower +precedence. The one prefix unary operator is "!" (complement), with highest +precedence. +

    +
    UTS#18 EXTENDED CHARACTER CLASSES
    +

    +The PCRE2_ALT_EXTENDED_CLASS option enables an alternative to Perl's (?[...]) +syntax, allowing instead extended class behaviour inside ordinary [...] +character classes. This altered syntax for [...] classes is loosely described +by the Unicode standard UTS#18. The PCRE2_ALT_EXTENDED_CLASS option does not +prevent use of (?[...]) classes; it just changes the meaning of all +[...] classes that are not nested inside a Perl (?[...]) class. +

    +

    +Firstly, in ordinary Perl [...] syntax, an expression such as "[a[]" is a +character class with two literal characters "a" and "[", but in UTS#18 extended +classes the "[" character becomes an additional metacharacter within classes, +denoting the start of a nested class, so a literal "[" must be escaped as "\[". +

    +

    +Secondly, within the UTS#18 extended syntax, there are operators "||", "&&", +"--" and "~~" which denote character class union, intersection, subtraction, +and symmetric difference respectively. In standard Perl syntax, these would +simply be needlessly-repeated literals (except for "--" which could be the +start or end of a range). In UTS#18 extended classes these operators can be used +in constructs such as [\p{L}--[QW]] for "Unicode letters, other than Q and W". +A literal "-" at the start or end of a range must be escaped, so while "[--1]" +in Perl syntax is the range from hyphen to "1", it must be escaped as "[\--1]" +in UTS#18 extended classes. +

    +

    +Unlike Perl's (?[...]) extended classes, the PCRE2_EXTENDED_MORE option to +ignore space and tab characters is not automatically enabled for UTS#18 +extended classes, but it is honoured if set. +

    +

    +Extended UTS#18 classes can be nested, and nested classes are themselves +extended classes (unlike Perl, where nested classes must be simple classes). +For example, [\p{L}&&[\p{Thai}||\p{Greek}]] matches any letter that is in +the Thai or Greek scripts. Note that this means that no special grouping +characters (such as the parentheses used in Perl's (?[...]) class syntax) are +needed. +

    +

    +Individual class items (literal characters, literal ranges, properties such as +\d or \p{...}, and nested classes) can be combined by juxtaposition or by an +operator. Juxtaposition is the implicit union operator, and binds more tightly +than any explicit operator. Thus a sequence of literals and/or ranges behaves +as if it is enclosed in square brackets. For example, [A-Z0-9&&[^E8]] is the +same as [[A-Z0-9]&&[^E8]], which matches any upper case alphanumeric character +except "E" or "8". +

    +

    +Precedence between the explicit operators is not defined, so mixing operators +is a syntax error. For example, [A&&B--C] is an error, but [A&&[B--C]] is +valid.

    -
    POSIX CHARACTER CLASSES
    +

    +This is an emerging syntax which is being adopted gradually across the regex +ecosystem: for example JavaScript adopted the "/v" flag in ECMAScript 2024; +Python's "re" module reserves the syntax for future use with a FutureWarning +for unescaped use of "[" as a literal within character classes. Due to UTS#18 +providing insufficient guidance, engines interpret the syntax differently. +Rust's "regex" crate and Python's "regex" PyPi module both implement UTS#18 +extended classes, but with slight incompatibilities ([A||B&&C] is parsed as +[A||[B&&C]] in Python's "regex" but as [[A||B]&&C] in Rust's "regex"). +

    +

    +PCRE2's syntax adds syntax restrictions similar to ECMASCript's /v flag, so +that all the UTS#18 extended classes accepted as valid by PCRE2 have the +property that they are interpreted either with the same behaviour, or as +invalid, by all other major engines. Please file an issue if you are aware of +cross-engine differences in behaviour between PCRE2 and another major engine. +

    +
    POSIX CHARACTER CLASSES

    Perl supports the POSIX notation for character classes. This uses names enclosed by [: and :] within the enclosing square brackets. PCRE2 also supports -this notation. For example, +this notation, in both ordinary and extended classes. For example,

       [01[:alpha:]%]
     
    @@ -1584,7 +1748,7 @@ property. [:xdigit:] In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" versions of those characters, whose Unicode code points start at U+FF10. This -is a change that was made in PCRE release 10.43 for Perl compatibility. +is a change that was made in PCRE2 release 10.43 for Perl compatibility.

    The other POSIX classes are unchanged by PCRE2_UCP, and match only characters @@ -1597,8 +1761,8 @@ just [:digit:] and [:xdigit:]. Within a pattern, this can be set and unset by (?aT) and (?-aT). The PCRE2_EXTRA_ASCII_POSIX option disables UCP processing for all POSIX classes, including [:digit:] and [:xdigit:]. Within a pattern, (?aP) and (?-aP) set and unset both these options for consistency. -

    -
    COMPATIBILITY FEATURE FOR WORD BOUNDARIES
    +

    +
    COMPATIBILITY FEATURE FOR WORD BOUNDARIES

    In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of @@ -1619,7 +1783,7 @@ used above in order to give exactly the POSIX behaviour. Note also that the PCRE2_UCP option changes the meaning of \w (and therefore \b) by default, so it also affects these POSIX sequences.

    -
    VERTICAL BAR
    +
    VERTICAL BAR

    Vertical bar characters are used to separate alternative patterns. For example, the pattern @@ -1634,7 +1798,7 @@ that succeeds is used. If the alternatives are within a group "succeeds" means matching the rest of the main pattern as well as the alternative in the group.

    -
    INTERNAL OPTION SETTING
    +
    INTERNAL OPTION SETTING

    The settings of several options can be changed within a pattern by a sequence of letters enclosed between "(?" and ")". The following are Perl-compatible, @@ -1732,7 +1896,7 @@ PCRE2_UTF and PCRE2_UCP options, respectively. However, the application can set the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, which lock out the use of the (*UTF) and (*UCP) sequences.

    -
    GROUPS
    +
    GROUPS

    Groups are delimited by parentheses (round brackets), which can be nested. Turning part of a pattern into a group does two things: @@ -1788,7 +1952,7 @@ from left to right, and options are not reset until the end of the group is reached, an option setting in one branch does affect subsequent branches, so the above patterns match "SUNDAY" as well as "Saturday".

    -
    DUPLICATE GROUP NUMBERS
    +
    DUPLICATE GROUP NUMBERS

    Perl 5.10 introduced a feature whereby each alternative in a group uses the same numbers for its capturing parentheses. Such a group starts with (?| and is @@ -1834,7 +1998,7 @@ true if any group with that number has matched. An alternative approach to using this "branch reset" feature is to use duplicate named groups, as described in the next section.

    -
    NAMED CAPTURE GROUPS
    +
    NAMED CAPTURE GROUPS

    Identifying capture groups by number is simple, but it can be very hard to keep track of the numbers in complicated patterns. Furthermore, if an expression is @@ -1954,7 +2118,7 @@ capture groups, see the pcre2api documentation.

    -
    REPETITION
    +
    REPETITION

    Repetition is specified by quantifiers, which may follow any one of these items: @@ -2118,8 +2282,9 @@ one succeeds. Consider this pattern: (?>.*?a)b It matches "ab" in the subject "aab". The use of the backtracking control verbs -(*PRUNE) and (*SKIP) also disable this optimization, and there is an option, -PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. +(*PRUNE) and (*SKIP) also disable this optimization. To do so explicitly, +either pass the compile option PCRE2_NO_DOTSTAR_ANCHOR, or call +pcre2_set_optimize() with a PCRE2_DOTSTAR_ANCHOR_OFF directive.

    When a capture group is repeated, the value captured is the substring that @@ -2135,7 +2300,7 @@ captured values may have been set in previous iterations. For example, after matches "aba" the value of the second captured substring is "b".

    -
    ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
    +
    ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS

    With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") repetition, failure of what follows normally causes the repeated item to be @@ -2216,8 +2381,9 @@ package, and PCRE1 copied it from there. It found its way into Perl at release PCRE2 has an optimization that automatically "possessifies" certain simple pattern constructs. For example, the sequence A+B is treated as A++B because there is no point in backtracking into a sequence of A's when B must follow. -This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting -the pattern with (*NO_AUTO_POSSESS). +This feature can be disabled by the PCRE2_NO_AUTO_POSSESS option, by calling +pcre2_set_optimize() with a PCRE2_AUTO_POSSESS_OFF directive, or by +starting the pattern with (*NO_AUTO_POSSESS).

    When a pattern contains an unlimited repeat inside a group that can itself be @@ -2245,7 +2411,7 @@ an atomic group, like this: sequences of non-digits cannot be broken, and failure happens quickly.

    -
    BACKREFERENCES
    +
    BACKREFERENCES

    Outside a character class, a backslash followed by a digit greater than 0 (and possibly further digits) is a backreference to a capture group earlier (that @@ -2383,23 +2549,32 @@ cause the group that they reference to be treated as an This restriction no longer applies, and backtracking into such groups can occur as normal.

    -
    ASSERTIONS
    +
    ASSERTIONS

    -An assertion is a test on the characters following or preceding the current -matching point that does not consume any characters. The simple assertions -coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described +An assertion is a test that does not consume any characters. The test must +succeed for the match to continue. The simple assertions coded as \b, \B, +\A, \G, \Z, \z, ^ and $ are described above.

    -More complicated assertions are coded as parenthesized groups. There are two -kinds: those that look ahead of the current position in the subject string, and -those that look behind it, and in each case an assertion may be positive (must -match for the assertion to be true) or negative (must not match for the -assertion to be true). An assertion group is matched in the normal way, -and if it is true, matching continues after it, but with the matching position +More complicated assertions are coded as parenthesized groups. If matching such +a group succeeds, matching continues after it, but with the matching position in the subject string reset to what it was before the assertion was processed.

    +A special kind of assertion, called a "scan substring" assertion, matches a +subpattern against a previously captured substring. This is described in the +section entitled +"Scan substring assertions" +below. It is a PCRE2 extension, not compatible with Perl. +

    +

    +The other goup-based assertions are of two kinds: those that look ahead of the +current position in the subject string, and those that look behind it, and in +each case an assertion may be positive (must match for the assertion to be +true) or negative (must not match for the assertion to be true). +

    +

    The Perl-compatible lookaround assertions are atomic. If an assertion is true, but there is a subsequent matching failure, there is no backtracking into the assertion. However, there are some cases where non-atomic assertions can be @@ -2624,7 +2799,7 @@ preceded by "foo", while is another pattern that matches "foo" preceded by three digits and any three characters that are not "999".

    -
    NON-ATOMIC ASSERTIONS
    +
    NON-ATOMIC ASSERTIONS

    Traditional lookaround assertions are atomic. That is, if an assertion is true, but there is a subsequent matching failure, there is no backtracking into the @@ -2683,8 +2858,67 @@ contain any control verbs such as (*ACCEPT). (This may change in future). Note that assertions that appear as conditions for conditional groups (see below) must be atomic. +

    +
    SCAN SUBSTRING ASSERTIONS
    +

    +A special kind of assertion, not compatible with Perl, makes it possible to +check the contents of a captured substring by matching it with a subpattern. +Because this involves capturing, this feature is not supported by +pcre2_dfa_match(). +

    +

    +A scan substring assertion starts with the sequence (*scan_substring: or +(*scs: which is followed by a list of substring numbers (absolute or relative) +and/or substring names enclosed in single quotes or angle brackets, all within +parentheses. The rest of the item is the subpattern that is applied to the +substring, as shown in these examples: +

    +  (*scan_substring:(1)...)
    +  (*scs:(-2)...)
    +  (*scs:('AB')...)
    +  (*scs:(1,'AB',-2)...)
    +
    +The list of groups is checked in the order they are given, and it is the +contents of the first one that is found to be set that are scanned. When +PCRE2_DUPNAMES is set and there are ambiguous group names, all groups with the +same name are checked in numerical order. A scan substring assertion fails if +none of the groups it references have been set.

    -
    SCRIPT RUNS
    +

    +The pattern match on the substring is always anchored, that is, it must match +from the start of the substring. There is no "bumpalong" if it does not match +at the start. The end of the subject is temporarily reset to be the end of the +substring, so \Z, \z, and $ will match there. However, the start of the +subject is not reset. This means that ^ matches only if the substring is +actually at the start of the main subject, but it also means that lookbehind +assertions into what precedes the substring are possible. +

    +

    +Here is a very simple example: find a word that contains the rare (in English) +sequence of letters "rh" not at the start: +

    +  \b(\w++)(*scs:(1).+rh)
    +
    +The first group captures a word which is then scanned by the second group. +This example does not actually need this heavyweight feature; the same match +can be achieved with: +
    +  \b\w+?rh\w*\b
    +
    +When things are more complicated, however, scanning a captured substring can be +a useful way to describe the required match. For exmple, there is a rather +complicated pattern in the PCRE2 test data that checks an entire subject string +for a palindrome, that is, the sequence of letters is the same in both +directions. Suppose you want to search for individual words of two or more +characters such as "level" that are palindromes: +
    +  (\b\w{2,}+\b)(*scs:(1)...palindrome-matching-pattern...)
    +
    +Within a substring scanning subpattern, references to other groups work as +normal. Capturing groups may appear, and will retain their values during +ongoing matching if the assertion succeeds. +

    +
    SCRIPT RUNS

    In concept, a script run is a sequence of characters that are all from the same Unicode script such as Latin or Greek. However, because some scripts are @@ -2746,7 +2980,7 @@ parentheses. should not be used within a script run group, because it causes an immediate exit from the group, bypassing the script run checking.

    -
    CONDITIONAL GROUPS
    +
    CONDITIONAL GROUPS

    It is possible to cause the matching process to obey a pattern fragment conditionally or to choose between two alternative fragments, depending on @@ -2947,13 +3181,13 @@ positive and negative assertions, because matching always continues after the assertion, whether it succeeds or fails. (Compare non-conditional assertions, for which captures are retained only for positive assertions that succeed.)

    -
    COMMENTS
    +
    COMMENTS

    There are two ways of including comments in patterns that are processed by PCRE2. In both cases, the start of the comment must not be in a character class, nor in the middle of any other sequence of related characters such as -(?: or a group name or number. The characters that make up a comment play -no part in the pattern matching. +(?: or a group name or number or a Unicode property name. The characters that +make up a comment play no part in the pattern matching.

    The sequence (?# marks the start of a comment that continues up to the next @@ -2977,7 +3211,7 @@ a newline in the pattern. The sequence \n is still literal at this stage, so it does not terminate the comment. Only an actual character with the code value 0x0a (the default newline) does so.

    -
    RECURSIVE PATTERNS
    +
    RECURSIVE PATTERNS

    Consider the problem of matching a string in parentheses, allowing for unlimited nested parentheses. Without the use of recursion, the best that can @@ -3165,7 +3399,7 @@ alternative matches "a" and then recurses. In the recursion, \1 does now match "b" and so the whole match succeeds. This match used to fail in Perl, but in later versions (I tried 5.024) it now works.

    -
    GROUPS AS SUBROUTINES
    +
    GROUPS AS SUBROUTINES

    If the syntax for a recursive group call (either by number or by name) is used outside the parentheses to which it refers, it operates a bit like a subroutine @@ -3213,7 +3447,7 @@ in groups when called as subroutines is described in the section entitled "Backtracking verbs in subroutines" below.

    -
    ONIGURUMA SUBROUTINE SYNTAX
    +
    ONIGURUMA SUBROUTINE SYNTAX

    For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or a number enclosed either in angle brackets or single quotes, is an alternative @@ -3231,7 +3465,7 @@ plus or a minus sign it is taken as a relative reference. For example: Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not synonymous. The former is a backreference; the latter is a subroutine call.

    -
    CALLOUTS
    +
    CALLOUTS

    Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl code to be obeyed in the middle of matching a regular expression. This makes it @@ -3244,7 +3478,9 @@ code. The feature is called "callout". The caller of PCRE2 provides an external function by putting its entry point in a match context using the function pcre2_set_callout(), and then passing that context to pcre2_match() or pcre2_dfa_match(). If no match context is passed, or if the callout -entry point is set to NULL, callouts are disabled. +entry point is set to NULL, callout points will be passed over silently during +matching. To disallow callouts in the pattern syntax, you may use the +PCRE2_EXTRA_NEVER_CALLOUT option.

    Within a regular expression, (?C<arg>) indicates a point at which the external @@ -3307,7 +3543,7 @@ example: The doubling is removed before the string is passed to the callout function.

    -
    BACKTRACKING CONTROL
    +
    BACKTRACKING CONTROL

    There are a number of special "Backtracking Control Verbs" (to use Perl's terminology) that modify the behaviour of backtracking during matching. They @@ -3347,8 +3583,8 @@ not there. Any number of these verbs may occur in a pattern. Except for

    Since these verbs are specifically related to backtracking, most of them can be used only when the pattern is to be matched using the traditional matching -function, because that uses a backtracking algorithm. With the exception of -(*FAIL), which behaves like a failing negative assertion, the backtracking +function or JIT, because they use backtracking algorithms. With the exception +of (*FAIL), which behaves like a failing negative assertion, the backtracking control verbs cause an error if encountered by the DFA matching function.

    @@ -3369,7 +3605,8 @@ minimum length of matching subject, or that a particular character must be present. When one of these optimizations bypasses the running of a match, any included backtracking verbs will not, of course, be processed. You can suppress the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option -when calling pcre2_compile(), or by starting the pattern with +when calling pcre2_compile(), by calling pcre2_set_optimize() with a +PCRE2_START_OPTIMIZE_OFF directive, or by starting the pattern with (*NO_START_OPT). There is more discussion of this option in the section entitled "Compiling a pattern" @@ -3502,7 +3739,8 @@ attempts starting at "P" and then with an empty string do not get as far as the

    If you are interested in (*MARK) values after failed matches, you should -probably set the PCRE2_NO_START_OPTIMIZE option +probably either set the PCRE2_NO_START_OPTIMIZE option or call +pcre2_set_optimize() with a PCRE2_START_OPTIMIZE_OFF directive (see above) to ensure that the match is always attempted.

    @@ -3514,9 +3752,9 @@ The following verbs do nothing when they are encountered. Matching continues with what follows, but if there is a subsequent match failure, causing a backtrack to the verb, a failure is forced. That is, backtracking cannot pass to the left of the verb. However, when one of these verbs appears inside an -atomic group or in a lookaround assertion that is true, its effect is confined -to that group, because once the group has been matched, there is never any -backtracking into it. Backtracking from beyond an assertion or an atomic group +atomic group or in an atomic lookaround assertion that is true, its effect is +confined to that group, because once the group has been matched, there is never +any backtracking into it. Backtracking from beyond an atomic assertion or group ignores the entire group, and seeks a preceding backtracking point.

    @@ -3782,9 +4020,11 @@ into the assertion. Note in particular that a (*MARK) name that is set in an assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern.

    -PCRE2 now supports non-atomic positive assertions, as described in the section -entitled +PCRE2 now supports non-atomic positive assertions and also "scan substring" +assertions, as described in the sections entitled "Non-atomic assertions" +and +"Scan substring assertions" above. These assertions must be standalone (not used as conditions). They are not Perl-compatible. For these assertions, a later backtrack does jump back into the assertion, and therefore verbs such as (*COMMIT) can be triggered by @@ -3793,7 +4033,8 @@ backtracks from later in the pattern.

    The effect of (*THEN) is not allowed to escape beyond an assertion. If there are no more branches to try, (*THEN) causes a positive assertion to be false, -and a negative assertion to be true. +and a negative assertion to be true. This behaviour differs from Perl when the +assertion has only one branch.

    The other backtracking verbs are not treated specially if they appear in a @@ -3829,13 +4070,57 @@ then a backtrack at the outer level. enclosing group that has alternatives (its normal behaviour). However, if there is no such group within the subroutine's group, the subroutine match fails and there is a backtrack at the outer level. +

    +
    EBCDIC ENVIRONMENTS
    +

    +Differences in the way PCRE behaves when it is running in an EBCDIC environment +are covered in this section. +

    +
    +Escape sequences +
    +

    +When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. \a, \e, +\f, \n, \r, and \t generate the appropriate EBCDIC code values. The \c +escape is processed as specified for Perl in the perlebcdic document. The +only characters that are allowed after \c are A-Z, a-z, or one of @, [, \, ], +^, _, or ?. Any other character provokes a compile-time error. The sequence +\c@ encodes character code 0; after \c the letters (in either case) encode +characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 +(hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F). +

    +

    +Thus, apart from \c?, these escapes generate the same character code values as +they do in an ASCII or Unicode environment, though the meanings of the values +mostly differ. For example, \cG always generates code value 7, which is BEL in +ASCII but DEL in EBCDIC. +

    +

    +The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but +because 127 is not a control character in EBCDIC, Perl makes it generate the +APC character. Unfortunately, there are several variants of EBCDIC. In most of +them the APC character has the value 255 (hex FF), but in the one Perl calls +POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC +values, PCRE2 makes \c? generate 95; otherwise it generates 255. +

    +
    +Character classes +
    +

    +In character classes there is a special case in EBCDIC environments for ranges +whose end points are both specified as literal letters in the same case. For +compatibility with Perl, EBCDIC code points within the range that are not +letters are omitted. For example, [h-k] matches only four characters, even +though the EBCDIC codes for h and k are 0x88 and 0x92, a range of 11 code +points. However, if the range is specified numerically, for example, +[\x88-\x92] or [h-\x92], all code points are included.

    -
    SEE ALSO
    +
    SEE ALSO

    pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3), pcre2(3).

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -3844,9 +4129,9 @@ Retired from University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 04 June 2024 +Last updated: 27 November 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2perform.html b/doc/html/pcre2perform.html index 55fdf20..b595119 100644 --- a/doc/html/pcre2perform.html +++ b/doc/html/pcre2perform.html @@ -271,7 +271,7 @@ Cambridge, England.


    REVISION

    -Last updated: 27 July 2022 +Last updated: 06 December 2022
    Copyright © 1997-2022 University of Cambridge.
    diff --git a/doc/html/pcre2posix.html b/doc/html/pcre2posix.html index 6e7abd9..bc60c3b 100644 --- a/doc/html/pcre2posix.html +++ b/doc/html/pcre2posix.html @@ -171,7 +171,7 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard. When a pattern that is compiled with this flag is passed to pcre2_regexec() for matching, the nmatch and pmatch arguments -are ignored, and no captured strings are returned. Versions of the PCRE library +are ignored, and no captured strings are returned. Versions of the PCRE2 library prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens because it disables the use of backreferences.

    @@ -370,7 +370,7 @@ Cambridge, England.
     


    REVISION

    -Last updated: 19 January 2024 +Last updated: 27 November 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2sample.html b/doc/html/pcre2sample.html index 345df03..0903f04 100644 --- a/doc/html/pcre2sample.html +++ b/doc/html/pcre2sample.html @@ -101,7 +101,7 @@ Cambridge, England. REVISION

    -Last updated: 02 February 2016 +Last updated: 14 November 2023
    Copyright © 1997-2016 University of Cambridge.
    diff --git a/doc/html/pcre2serialize.html b/doc/html/pcre2serialize.html index 19418a8..d189bde 100644 --- a/doc/html/pcre2serialize.html +++ b/doc/html/pcre2serialize.html @@ -203,7 +203,7 @@ Cambridge, England.


    REVISION

    -Last updated: 27 June 2018 +Last updated: 19 January 2024
    Copyright © 1997-2018 University of Cambridge.
    diff --git a/doc/html/pcre2syntax.html b/doc/html/pcre2syntax.html index 1c0ccb0..46da3d7 100644 --- a/doc/html/pcre2syntax.html +++ b/doc/html/pcre2syntax.html @@ -24,34 +24,41 @@ please consult the man page, in case the conversion went wrong.

  • SCRIPT MATCHING WITH \p AND \P
  • THE BIDI_CLASS PROPERTY FOR \p AND \P
  • CHARACTER CLASSES -
  • QUANTIFIERS -
  • ANCHORS AND SIMPLE ASSERTIONS -
  • REPORTED MATCH POINT SETTING -
  • ALTERNATION -
  • CAPTURING -
  • ATOMIC GROUPS -
  • COMMENT -
  • OPTION SETTING -
  • NEWLINE CONVENTION -
  • WHAT \R MATCHES -
  • LOOKAHEAD AND LOOKBEHIND ASSERTIONS -
  • NON-ATOMIC LOOKAROUND ASSERTIONS -
  • SCRIPT RUNS -
  • BACKREFERENCES -
  • SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) -
  • CONDITIONAL PATTERNS -
  • BACKTRACKING CONTROL -
  • CALLOUTS -
  • SEE ALSO -
  • AUTHOR -
  • REVISION +
  • PERL EXTENDED CHARACTER CLASSES +
  • QUANTIFIERS +
  • ANCHORS AND SIMPLE ASSERTIONS +
  • REPORTED MATCH POINT SETTING +
  • ALTERNATION +
  • CAPTURING +
  • ATOMIC GROUPS +
  • COMMENT +
  • OPTION SETTING +
  • NEWLINE CONVENTION +
  • WHAT \R MATCHES +
  • LOOKAHEAD AND LOOKBEHIND ASSERTIONS +
  • NON-ATOMIC LOOKAROUND ASSERTIONS +
  • SUBSTRING SCAN ASSERTION +
  • SCRIPT RUNS +
  • BACKREFERENCES +
  • SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) +
  • CONDITIONAL PATTERNS +
  • BACKTRACKING CONTROL +
  • CALLOUTS +
  • REPLACEMENT STRINGS +
  • SEE ALSO +
  • AUTHOR +
  • REVISION
    PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY

    -The full syntax and semantics of the regular expressions that are supported by -PCRE2 are described in the +The full syntax and semantics of the regular expression patterns that are +supported by PCRE2 are described in the pcre2pattern -documentation. This document contains a quick-reference summary of the syntax. +documentation. This document contains a quick-reference summary of the pattern +syntax followed by the syntax of replacement strings in substitution function. +The full description of the latter is in the +pcre2api +documentation.


    QUOTING

    @@ -60,7 +67,10 @@ documentation. This document contains a quick-reference summary of the syntax. \Q...\E treat enclosed characters as literal

  • Note that white space inside \Q...\E is always treated as literal, even if -PCRE2_EXTENDED is set, causing most other white space to be ignored. +PCRE2_EXTENDED is set, causing most other white space to be ignored. Note also +that PCRE2's handling of \Q...\E has some differences from Perl's. See the +pcre2pattern +documentation for details.


    BRACED ITEMS

    @@ -91,6 +101,11 @@ sequence causes an error. \xhh character with hex code hh \x{hh..} character with hex code hh.. +\N{U+hh..} is synonymous with \x{hh..} but is not supported in environments +that use EBCDIC code (mainly IBM mainframes). Note that \N not followed by an +opening curly bracket has a different meaning (see below). +

    +

    If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the following are also recognized:

    @@ -98,7 +113,7 @@ following are also recognized:
       \uhhhh     character with hex code hhhh
       \u{hh..}   character with hex code hh.. but only for EXTRA_ALT_BSUX
     
    -When \x is not followed by {, from zero to two hexadecimal digits are read, +When \x is not followed by {, one or two hexadecimal digits are read, but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be recognized as a hexadecimal escape; otherwise it matches a literal "x". Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits @@ -112,9 +127,7 @@ a non-zero digit is complicated; for details see the section in the pcre2pattern documentation, where details of escape processing in EBCDIC environments are -also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not -supported in EBCDIC environments. Note that \N not followed by an opening -curly bracket has a different meaning (see below). +also given.


    CHARACTER TYPES

    @@ -154,8 +167,9 @@ sequences to matching only ASCII characters.

    Property descriptions in \p and \P are matched caselessly; hyphens, -underscores, and white space are ignored, in accordance with Unicode's "loose -matching" rules. +underscores, and ASCII white space characters are ignored, in accordance with +Unicode's "loose matching" rules. For example, \p{Bidi_Class=al} is the same +as \p{ bidi class = AL }.


    GENERAL CATEGORY PROPERTIES FOR \p and \P

    @@ -168,13 +182,13 @@ matching" rules. Cs Surrogate L Letter + Lc Cased letter, the union of Ll, Lu, and Lt + L& Synonym of Lc Ll Lower case letter Lm Modifier letter Lo Other letter Lt Title case letter Lu Upper case letter - Lc Ll, Lu, or Lt - L& Ll, Lu, or Lt M Mark Mc Spacing mark @@ -205,7 +219,9 @@ matching" rules. Zl Line separator Zp Paragraph separator Zs Space separator - + +From release 10.45, when caseless matching is set, Ll, Lu, and Lt are all +equivalent to Lc.


    PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P

    @@ -268,7 +284,7 @@ The recognized classes are: RLI right-to-left isolate RLO right-to-left override S segment separator - WS which space + WS white space


    CHARACTER CLASSES
    @@ -299,7 +315,45 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default, but some of them use Unicode properties if PCRE2_UCP is set. You can use \Q...\E inside a character class.

    -
    QUANTIFIERS
    +

    +When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes may be +used, allowing nested character classes, combined using set operators. +

    +  [x&&[^y]]   UTS#18 extended character class
    +
    +  x||y        set union (OR)
    +  x&&y        set intersection (AND)
    +  x--y        set difference (AND NOT)
    +  x~~y        set symmetric difference (XOR)
    +
    +
    +

    +
    PERL EXTENDED CHARACTER CLASSES
    +

    +

    +  (?[...])                Perl extended character class
    +  (?[\p{Thai} & \p{Nd}])  operators; whitespace ignored
    +  (?[(x - y) & z])        parentheses for grouping
    +
    +  (?[ [^3] & \p{Nd} ])    [...] is a nested ordinary class
    +  (?[ [:alpha:] - [z] ])  POSIX set is allowed outside [...]
    +  (?[ \d - [3] ])         backslash-escaped set is allowed outside [...]
    +  (?[ !\n & [:ascii:] ])  backslash-escaped character is allowed outside [...]
    +                      all other characters or ranges must be enclosed in [...]
    +
    +  x|y, x+y                set union (OR)
    +  x&y                     set intersection (AND)
    +  x-y                     set difference (AND NOT)
    +  x^y                     set symmetric difference (XOR)
    +  !x                      set complement (NOT)
    +
    +Inside a Perl extended character class, [...] switches mode to be interpreted +as an ordinary character class. Outside of a nested [...], the only items +permitted are backslash-escapes, POSIX sets, operators, and parentheses. Inside +a nested ordinary class, ^ has its usual meaning (inverts the class when used +as the first character); outside of a nested class, ^ is the XOR operator. +

    +
    QUANTIFIERS

       ?           0 or 1, greedy
    @@ -323,7 +377,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
       {,m}?       zero up to m, lazy
     

    -
    ANCHORS AND SIMPLE ASSERTIONS
    +
    ANCHORS AND SIMPLE ASSERTIONS

       \b          word boundary
    @@ -341,7 +395,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
       \G          first matching position in subject
     

    -
    REPORTED MATCH POINT SETTING
    +
    REPORTED MATCH POINT SETTING

       \K          set reported start of match
    @@ -351,13 +405,13 @@ for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
     option is set, the previous behaviour is re-enabled. When this option is set,
     \K is honoured in positive assertions, but ignored in negative ones.
     

    -
    ALTERNATION
    +
    ALTERNATION

       expr|expr|expr...
     

    -
    CAPTURING
    +
    CAPTURING

       (...)           capture group
    @@ -372,20 +426,20 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
     in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
     both cases, a name must not start with a digit.
     

    -
    ATOMIC GROUPS
    +
    ATOMIC GROUPS

       (?>...)         atomic non-capture group
       (*atomic:...)   atomic non-capture group
     

    -
    COMMENT
    +
    COMMENT

       (?#....)        comment (not nestable)
     

    -
    OPTION SETTING
    +
    OPTION SETTING

    Changes of these options within a group are automatically cancelled at the end of the group. @@ -409,7 +463,7 @@ of the group. (?^) unset imnrsx options

    (?aP) implies (?aT) as well, though this has no additional effect. However, it -means that (?-aP) is really (?-PT) which disables all ASCII restrictions for +means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for POSIX classes.

    @@ -421,20 +475,22 @@ example (?i:...).

    The following are recognized only at the very start of a pattern or after one -of the newline or \R options with similar syntax. More than one of them may -appear. For the first three, d is a decimal number. -

    -  (*LIMIT_DEPTH=d) set the backtracking limit to d
    -  (*LIMIT_HEAP=d)  set the heap size limit to d * 1024 bytes
    -  (*LIMIT_MATCH=d) set the match limit to d
    -  (*NOTEMPTY)      set PCRE2_NOTEMPTY when matching
    -  (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
    -  (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
    +of the newline or \R sequences or options with similar syntax. More than one
    +of them may appear. For the first three, d is a decimal number.
    +
    +  (*LIMIT_DEPTH=d)     set the backtracking limit to d
    +  (*LIMIT_HEAP=d)      set the heap size limit to d * 1024 bytes
    +  (*LIMIT_MATCH=d)     set the match limit to d
    +  (*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching
    +  (*NOTEMPTY)          set PCRE2_NOTEMPTY when matching
    +  (*NOTEMPTY_ATSTART)  set PCRE2_NOTEMPTY_ATSTART when matching
    +  (*NO_AUTO_POSSESS)   no auto-possessification (PCRE2_NO_AUTO_POSSESS)
       (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR)
    -  (*NO_JIT)       disable JIT optimization
    -  (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
    -  (*UTF)          set appropriate UTF mode for the library in use
    -  (*UCP)          set PCRE2_UCP (use Unicode properties for \d etc)
    +  (*NO_JIT)            disable JIT optimization
    +  (*NO_START_OPT)      no start-match optimization (PCRE2_NO_START_OPTIMIZE)
    +  (*TURKISH_CASING)    set PCRE2_EXTRA_TURKISH_CASING when matching
    +  (*UTF)               set appropriate UTF mode for the library in use
    +  (*UCP)               set PCRE2_UCP (use Unicode properties for \d etc)
     
    Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of the limits set by the caller of pcre2_match() or pcre2_dfa_match(), @@ -442,7 +498,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.

    -
    NEWLINE CONVENTION
    +
    NEWLINE CONVENTION

    These are recognized only at the very start of the pattern or after option settings with a similar syntax. @@ -455,7 +511,7 @@ settings with a similar syntax. (*NUL) the NUL character (binary zero)

    -
    WHAT \R MATCHES
    +
    WHAT \R MATCHES

    These are recognized only at the very start of the pattern or after option setting with a similar syntax. @@ -464,7 +520,7 @@ setting with a similar syntax. (*BSR_UNICODE) any Unicode newline sequence

    -
    LOOKAHEAD AND LOOKBEHIND ASSERTIONS
    +
    LOOKAHEAD AND LOOKBEHIND ASSERTIONS

       (?=...)                     )
    @@ -490,7 +546,7 @@ the maximum for each branch is limited to a value set by the caller of
     (ultimate default 255). If every branch matches a fixed number of characters,
     the limit for each branch is 65535 characters.
     

    -
    NON-ATOMIC LOOKAROUND ASSERTIONS
    +
    NON-ATOMIC LOOKAROUND ASSERTIONS

    These assertions are specific to PCRE2 and are not Perl-compatible.

    @@ -503,7 +559,24 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       (*non_atomic_positive_lookbehind:...)  )
     

    -
    SCRIPT RUNS
    +
    SUBSTRING SCAN ASSERTION
    +

    +This feature is not Perl-compatible. +

    +  (*scan_substring:(grouplist)...)  scan captured substring
    +  (*scs:(grouplist)...)             scan captured substring
    +
    +The comma-separated list may identify groups in any of the following ways: +
    +  n       absolute reference
    +  +n      relative reference
    +  -n      relative reference
    +  <name>  name
    +  'name'  name
    +
    +
    +

    +
    SCRIPT RUNS

       (*script_run:...)           ) script run, can be backtracked into
    @@ -513,7 +586,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       (*asr:...)                  )
     

    -
    BACKREFERENCES
    +
    BACKREFERENCES

       \n              reference by number (can be ambiguous)
    @@ -530,7 +603,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       (?P=name)       reference by name (Python)
     

    -
    SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)
    +
    SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)

       (?R)            recurse whole pattern
    @@ -549,7 +622,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       \g'-n'          call subroutine by relative number (PCRE2 extension)
     

    -
    CONDITIONAL PATTERNS
    +
    CONDITIONAL PATTERNS

       (?(condition)yes-pattern)
    @@ -572,7 +645,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
     conditions or recursion tests. Such a condition is interpreted as a reference
     condition if the relevant named group exists.
     

    -
    BACKTRACKING CONTROL
    +
    BACKTRACKING CONTROL

    All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the name is mandatory, for the others it is optional. (*SKIP) changes its behaviour @@ -599,7 +672,7 @@ pattern is not anchored. The effect of one of these verbs in a group called as a subroutine is confined to the subroutine call.

    -
    CALLOUTS
    +
    CALLOUTS

       (?C)            callout (assumed number 0)
    @@ -610,12 +683,58 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
     start and the end), and the starting delimiter { matched with the ending
     delimiter }. To encode the ending delimiter within the string, double it.
     

    -
    SEE ALSO
    +
    REPLACEMENT STRINGS
    +

    +If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for +pcre2_substitute() is not interpreted. Otherwise, by default, the only +special character is the dollar character in one of the following forms: +

    +  $$                  insert a dollar character
    +  $n or ${n}          insert the contents of group n
    +  $<name>             insert the contents of named group
    +  $0 or $&            insert the entire matched substring
    +  $`                  insert the substring that precedes the match
    +  $'                  insert the substring that follows the match
    +  $_                  insert the entire input string
    +  $*MARK or ${*MARK}  insert a control verb name
    +
    +For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is set, +there is additional interpretation: +

    +

    +1. Backslash is an escape character, and the forms described in "ESCAPED +CHARACTERS" above are recognized. Also: +

    +  \Q...\E   can be used to suppress interpretation
    +  \l        force the next character to lower case
    +  \u        force the next character to upper case
    +  \L        force subsequent characters to lower case
    +  \U        force subsequent characters to upper case
    +  \u\L      force next character to upper case, then all lower
    +  \l\U      force next character to lower case, then all upper
    +  \E        end \L or \U case forcing
    +  \b        backspace character (note: as in character class in pattern)
    +  \v        vertical tab character (note: not the same as in a pattern)
    +
    +2. The Python form \g<n>, where the angle brackets are part of the syntax and +n is either a group name or a number, is recognized as an alternative way +of inserting the contents of a group, for example \g<3>. +

    +

    +3. Capture substitution supports the following additional forms: +

    +  ${n:-string}             default for unset group
    +  ${n:+string1:string2}    values for set/unset group
    +
    +The substitution strings themselves are expanded. Backslash can be used to +escape colons and closing curly brackets. +

    +
    SEE ALSO

    pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2(3).

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -624,11 +743,11 @@ Retired from University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 12 October 2023 +Last updated: 27 November 2024
    -Copyright © 1997-2023 University of Cambridge. +Copyright © 1997-2024 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html index 6cc3cc3..db9073f 100644 --- a/doc/html/pcre2test.html +++ b/doc/html/pcre2test.html @@ -105,8 +105,8 @@ Input for the 16-bit and 32-bit libraries

    When testing the 16-bit or 32-bit libraries, there is a need to be able to generate character code points greater than 255 in the strings that are passed -to the library. For subject lines, backslash escapes can be used. In addition, -when the utf modifier (see +to the library. For subject lines and some patterns, backslash escapes can be +used. In addition, when the utf modifier (see "Setting compilation options" below) is set, the pattern and any following subject lines are interpreted as UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. @@ -125,9 +125,8 @@ UTF-8 (in its original definition) is not capable of encoding values greater than 0x7fffffff, but such values can be handled by the 32-bit library. When testing this library in non-UTF mode with utf8_input set, if any character is preceded by the byte 0xff (which is an invalid byte in UTF-8) -0x80000000 is added to the character's value. This is the only way of passing -such code points in a pattern string. For subject strings, using an escape -sequence is preferable. +0x80000000 is added to the character's value. For subject strings, using an +escape sequence is preferable.


    COMMAND LINE OPTIONS

    @@ -178,8 +177,8 @@ functionality is intended for use in scripts such as RunTest. The following options output the value and set the exit code as indicated:

       ebcdic-nl  the code for LF (= NL) in an EBCDIC environment:
    -               0x15 or 0x25
    -               0 if used in an ASCII environment
    +               either 0x15 or 0x25
    +               0 if used in an ASCII/Unicode environment
                    exit code is always 0
       linksize   the configured internal link size (2, 3, or 4)
                    exit code is set to the link size
    @@ -201,6 +200,16 @@ to the same value:
       pcre2-8      the 8-bit library was built
       unicode      Unicode support is available
     
    +Note that the availability of JIT support in the library does not guarantee +that it can actually be used because in some environments it is unable to +allocate executable memory. The option "jitusable" gives more detailed +information. It returns one of the following values: +
    +  0  JIT is available and usable
    +  1  JIT is available but cannot allocate executable memory
    +  2  JIT is not available
    +  3  Unexpected return from test call to pcre2_jit_compile()
    +
    If an unknown option is given, an error message is output; the exit code is 0.

    @@ -527,39 +536,48 @@ space is removed, and the line is scanned for backslash escapes, unless the subject_literal modifier was set for the pattern. The following provide a means of encoding non-printing characters in a visible way:

    -  \a         alarm (BEL, \x07)
    -  \b         backspace (\x08)
    -  \e         escape (\x27)
    -  \f         form feed (\x0c)
    -  \n         newline (\x0a)
    -  \r         carriage return (\x0d)
    -  \t         tab (\x09)
    -  \v         vertical tab (\x0b)
    -  \nnn       octal character (up to 3 octal digits); always
    -               a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
    -  \o{dd...}  octal character (any number of octal digits}
    -  \xhh       hexadecimal byte (up to 2 hex digits)
    -  \x{hh...}  hexadecimal character (any number of hex digits)
    +  \a          alarm (BEL, \x07)
    +  \b          backspace (\x08)
    +  \e          escape (\x27)
    +  \f          form feed (\x0c)
    +  \n          newline (\x0a)
    +  \N{U+hh...} unicode character (any number of hex digits)
    +  \r          carriage return (\x0d)
    +  \t          tab (\x09)
    +  \v          vertical tab (\x0b)
    +  \ddd        octal number (up to 3 octal digits); represent a single
    +                code point unless larger than 255 with the 8-bit library
    +  \o{dd...}   octal number (any number of octal digits} representing a
    +                character in UTF mode or a code point
    +  \xhh        hexadecimal byte (up to 2 hex digits)
    +  \x{hh...}   hexadecimal number (up to 8 hex digits) representing a
    +                character in UTF mode or a code point
     
    -The use of \x{hh...} is not dependent on the use of the utf modifier on -the pattern. It is recognized always. There may be any number of hexadecimal -digits inside the braces; invalid values provoke error messages. +Invoking \N{U+hh...} or \x{hh...} doesn't require the use of the utf +modifier on the pattern. It is always recognized. There may be any number of +hexadecimal digits inside the braces; invalid values provoke error messages +but when using \N{U+hh...} with some invalid unicode characters they will +be accepted with a warning instead.

    -Note that \xhh specifies one byte rather than one character in UTF-8 mode; -this makes it possible to construct invalid UTF-8 sequences for testing -purposes. On the other hand, \x{hh} is interpreted as a UTF-8 character in -UTF-8 mode, generating more than one byte if the value is greater than 127. -When testing the 8-bit library not in UTF-8 mode, \x{hh} generates one byte -for values less than 256, and causes an error for greater values. +Note that even in UTF-8 mode, \xhh (and depending of how large, \ddd) +describe one byte rather than one character; this makes it possible to +construct invalid UTF-8 sequences for testing purposes. On the other hand, +\x{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only generating +more than one byte if the value is greater than 127. To avoid the ambiguity +it is preferred to use \N{U+hh...} when describing characters. When testing +the 8-bit library not in UTF-8 mode, \x{hh} generates one byte for values +that could fit on it, and causes an error for greater values.

    -In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it -possible to construct invalid UTF-16 sequences for testing purposes. +When testing the 16-bit library, not in UTF-16 mode, all 4-digit \x{hhhh} +values are accepted. This makes it possible to construct invalid UTF-16 +sequences for testing purposes.

    -In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This makes it -possible to construct invalid UTF-32 sequences for testing purposes. +When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit \x{...} +values are accepted. This makes it possible to construct invalid UTF-32 +sequences for testing purposes.

    There is a special backslash sequence that specifies replication of one or more @@ -625,6 +643,7 @@ for a description of the effects of these options. allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_extended_class set PCRE2_ALT_EXTENDED_CLASS alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED /a ascii_all set all ASCII options @@ -653,13 +672,17 @@ for a description of the effects of these options. match_word set PCRE2_EXTRA_MATCH_WORD /m multiline set PCRE2_MULTILINE never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_callout set PCRE2_EXTRA_NEVER_CALLOUT never_ucp set PCRE2_NEVER_UCP never_utf set PCRE2_NEVER_UTF /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_bs0 set PCRE2_EXTRA_NO_BS0 no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR no_start_optimize set PCRE2_NO_START_OPTIMIZE no_utf_check set PCRE2_NO_UTF_CHECK + python_octal set PCRE2_EXTRA_PYTHON_OCTAL + turkish_casing set PCRE2_EXTRA_TURKISH_CASING ucp set PCRE2_UCP ungreedy set PCRE2_UNGREEDY use_offset_limit set PCRE2_USE_OFFSET_LIMIT @@ -671,6 +694,23 @@ notation. Otherwise, those less than 0x100 are output in hex without the curly brackets. Setting utf in 16-bit or 32-bit mode also causes pattern and subject strings to be translated to UTF-16 or UTF-32, respectively, before being passed to library functions. +
    +
    +The following modifiers enable or disable performance optimizations by +calling pcre2_set_optimize() before invoking the regex compiler. +

    +      optimization_full      enable all optional optimizations
    +      optimization_none      disable all optional optimizations
    +      auto_possess           auto-possessify variable quantifiers
    +      auto_possess_off       don't auto-possessify variable quantifiers
    +      dotstar_anchor         anchor patterns starting with .*
    +      dotstar_anchor_off     don't anchor patterns starting with .*
    +      start_optimize         enable pre-scan of subject string
    +      start_optimize_off     disable pre-scan of subject string
    +
    +See the +pcre2_set_optimize +documentation for details on these optimizations.


    Setting compilation controls @@ -680,14 +720,15 @@ The following modifiers affect the compilation process or request information about the pattern. There are single-letter abbreviations for some that are heavily used in the test files.
    -      bsr=[anycrlf|unicode]     specify \R handling
       /B  bincode                   show binary code without lengths
    +      bsr=[anycrlf|unicode]     specify \R handling
           callout_info              show callout information
           convert=<options>         request foreign pattern conversion
           convert_glob_escape=c     set glob escape character
           convert_glob_separator=c  set glob separator character
           convert_length            set convert buffer length
           debug                     same as info,fullbincode
    +      expand                    expand repetition syntax in pattern
           framesize                 show matching frame size
           fullbincode               show binary code with lengths
       /I  info                      show info about compiled pattern
    @@ -709,6 +750,7 @@ heavily used in the test files.
           posix_nosub               use the POSIX API with REG_NOSUB
           push                      push compiled pattern onto the stack
           pushcopy                  push a copy onto the stack
    +      pushtablescopy            push a copy with tables onto the stack
           stackguard=<number>       test the stackguard feature
           subject_literal           treat all subject lines as literal
           tables=[0|1|2|3]          select internal tables
    @@ -1128,6 +1170,7 @@ process.
           replace=<string>            specify a replacement string
           startchar                   show starting character when relevant
           substitute_callout          use substitution callouts
    +      substitute_case_callout     use substitution case callouts
           substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
           substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
           substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
    @@ -1217,10 +1260,11 @@ Setting match options
     

    The following modifiers set options for pcre2_match() or pcre2_dfa_match(). See -pcreapi +pcre2api for a description of their effects.

           anchored                   set PCRE2_ANCHORED
    +      copy_matched_subject       set PCRE2_COPY_MATCHED_SUBJECT
           endanchored                set PCRE2_ENDANCHORED
           dfa_restart                set PCRE2_DFA_RESTART
           dfa_shortest               set PCRE2_DFA_SHORTEST
    @@ -1271,8 +1315,8 @@ pattern, but can be overridden by modifiers on the subject.
           aftertext                  show text after match
           allaftertext               show text after captures
           allcaptures                show all captures
    -      allvector                  show the entire ovector
           allusedtext                show all consulted text (non-JIT only)
    +      allvector                  show the entire ovector
           altglobal                  alternative global matching
           callout_capture            show captures at callout time
           callout_data=<n>           set a value to pass via callouts
    @@ -1306,7 +1350,8 @@ pattern, but can be overridden by modifiers on the subject.
           startchar                  show startchar when relevant
           startoffset=<n>            same as offset=<n>
           substitute_callout         use substitution callouts
    -      substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
    +      substitute_case_callout    use substitution case callouts
    +      substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
           substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
           substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
           substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
    @@ -1592,6 +1637,21 @@ If both are set for the same number, stop takes precedence. Only a single skip
     or stop is supported, which is sufficient for testing that the feature works.
     


    +Testing substitute case callouts +
    +

    +If the substitute_case_callout modifier is set, a substitution +case callout function is set up. The callout function is called for each +substituted chunk which is to be case-transformed. +

    +

    +The callout function passed is a fixed function with implementation for certain +behaviours: inputs which shrink when case-transformed; inputs which grow; inputs +with distinct upper/lower/titlecase forms. The characters which are not +special-cased for testing purposes are left unmodified, as if they are caseless +characters. +

    +
    Setting the JIT stack size

    @@ -2204,7 +2264,7 @@ Cambridge, England.


    REVISION

    -Last updated: 24 April 2024 +Last updated: 26 December 2024
    Copyright © 1997-2024 University of Cambridge.
    diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html index 6f0972e..5b42532 100644 --- a/doc/html/pcre2unicode.html +++ b/doc/html/pcre2unicode.html @@ -53,7 +53,7 @@ When PCRE2 is built with Unicode support, the escape sequences \p{..}, The Unicode properties that can be tested are a subset of those that Perl supports. Currently they are limited to the general category properties such as Lu for an upper case letter or Nd for a decimal number, the derived properties -Any and LC (synonym L&), the Unicode script names such as Arabic or Han, +Any and Lc (synonym L&), the Unicode script names such as Arabic or Han, Bidi_Class, Bidi_Control, and a few binary properties.

    @@ -157,6 +157,40 @@ Recognition of these non-ASCII characters as case-equivalent to their ASCII counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT option. When this is set, all characters in a case equivalence must either be ASCII or non-ASCII; there can be no mixing. +

    +    Without PCRE2_EXTRA_CASELESS_RESTRICT:
    +      'k' = 'K' = U+212A (Kelvin sign)
    +      's' = 'S' = U+017F (long S)
    +    With PCRE2_EXTRA_CASELESS_RESTRICT:
    +      'k' = 'K'
    +      U+212A (Kelvin sign)  only case-equivalent to itself
    +      's' = 'S'
    +      U+017F (long S)       only case-equivalent to itself
    +
    +

    +

    +One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +

    +    Without PCRE2_EXTRA_TURKISH_CASING:
    +      'i' = 'I'
    +      U+0130 (capital I with dot above)  only case-equivalent to itself
    +      U+0131 (small dotless i)           only case-equivalent to itself
    +    With PCRE2_EXTRA_TURKISH_CASING:
    +      'i' = U+0130 (capital I with dot above)
    +      U+0131 (small dotless i) = 'I'
    +
    +

    +

    +It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and +PCRE2_EXTRA_TURKISH_CASING together. +

    +

    +From release 10.45 the Unicode letter properties Lu (upper case), Ll (lower +case), and Lt (title case) are all treated as Lc (cased letter) when caseless +matching is set by the PCRE2_CASELESS option or (?i) within the pattern.


    SCRIPT RUNS @@ -513,9 +547,9 @@ Cambridge, England. REVISION

    -Last updated: 12 October 2023 +Last updated: 27 November 2024
    -Copyright © 1997-2023 University of Cambridge. +Copyright © 1997-2024 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/doc/index.html.src b/doc/index.html.src deleted file mode 100644 index e4dc786..0000000 --- a/doc/index.html.src +++ /dev/null @@ -1,318 +0,0 @@ - - - -PCRE2 specification - - -

    Perl-compatible Regular Expressions (revised API: PCRE2)

    -

    -The HTML documentation for PCRE2 consists of a number of pages that are listed -below in alphabetical order. If you are new to PCRE2, please read the first one -first. -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    pcre2  Introductory page
    pcre2-config  Information about the installation configuration
    pcre2api  PCRE2's native API
    pcre2build  Building PCRE2
    pcre2callout  The callout facility
    pcre2compat  Compability with Perl
    pcre2convert  Experimental foreign pattern conversion functions
    pcre2demo  A demonstration C program that uses the PCRE2 library
    pcre2grep  The pcre2grep command
    pcre2jit  Discussion of the just-in-time optimization support
    pcre2limits  Details of size and other limits
    pcre2matching  Discussion of the two matching algorithms
    pcre2partial  Using PCRE2 for partial matching
    pcre2pattern  Specification of the regular expressions supported by PCRE2
    pcre2perform  Some comments on performance
    pcre2posix  The POSIX API to the PCRE2 8-bit library
    pcre2sample  Discussion of the pcre2demo program
    pcre2serialize  Serializing functions for saving precompiled patterns
    pcre2syntax  Syntax quick-reference summary
    pcre2test  The pcre2test command for testing PCRE2
    pcre2unicode  Discussion of Unicode and UTF-8/UTF-16/UTF-32 support
    - -

    -There are also individual pages that summarize the interface for each function -in the library. -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    pcre2_callout_enumerate  Enumerate callouts in a compiled pattern
    pcre2_code_copy  Copy a compiled pattern
    pcre2_code_copy_with_tables  Copy a compiled pattern and its character tables
    pcre2_code_free  Free a compiled pattern
    pcre2_compile  Compile a regular expression pattern
    pcre2_compile_context_copy  Copy a compile context
    pcre2_compile_context_create  Create a compile context
    pcre2_compile_context_free  Free a compile context
    pcre2_config  Show build-time configuration options
    pcre2_convert_context_copy  Copy a convert context
    pcre2_convert_context_create  Create a convert context
    pcre2_convert_context_free  Free a convert context
    pcre2_converted_pattern_free  Free converted foreign pattern
    pcre2_dfa_match  Match a compiled pattern to a subject string - (DFA algorithm; not Perl compatible)
    pcre2_general_context_copy  Copy a general context
    pcre2_general_context_create  Create a general context
    pcre2_general_context_free  Free a general context
    pcre2_get_error_message  Get textual error message for error number
    pcre2_get_mark  Get a (*MARK) name
    pcre2_get_match_data_size  Get the size of a match data block
    pcre2_get_ovector_count  Get the ovector count
    pcre2_get_ovector_pointer  Get a pointer to the ovector
    pcre2_get_startchar  Get the starting character offset
    pcre2_jit_compile  Process a compiled pattern with the JIT compiler
    pcre2_jit_free_unused_memory  Free unused JIT memory
    pcre2_jit_match  Fast path interface to JIT matching
    pcre2_jit_stack_assign  Assign stack for JIT matching
    pcre2_jit_stack_create  Create a stack for JIT matching
    pcre2_jit_stack_free  Free a JIT matching stack
    pcre2_maketables  Build character tables in current locale
    pcre2_maketables_free  Free character tables
    pcre2_match  Match a compiled pattern to a subject string - (Perl compatible)
    pcre2_match_context_copy  Copy a match context
    pcre2_match_context_create  Create a match context
    pcre2_match_context_free  Free a match context
    pcre2_match_data_create  Create a match data block
    pcre2_match_data_create_from_pattern  Create a match data block getting size from pattern
    pcre2_match_data_free  Free a match data block
    pcre2_pattern_convert  Experimental foreign pattern converter
    pcre2_pattern_info  Extract information about a pattern
    pcre2_serialize_decode  Decode serialized compiled patterns
    pcre2_serialize_encode  Serialize compiled patterns for save/restore
    pcre2_serialize_free  Free serialized compiled patterns
    pcre2_serialize_get_number_of_codes  Get number of serialized compiled patterns
    pcre2_set_bsr  Set \R convention
    pcre2_set_callout  Set up a callout function
    pcre2_set_character_tables  Set character tables
    pcre2_set_compile_extra_options  Set compile time extra options
    pcre2_set_compile_recursion_guard  Set up a compile recursion guard function
    pcre2_set_depth_limit  Set the match backtracking depth limit
    pcre2_set_glob_escape  Set glob escape character
    pcre2_set_glob_separator  Set glob separator character
    pcre2_set_heap_limit  Set the match backtracking heap limit
    pcre2_set_match_limit  Set the match limit
    pcre2_set_max_pattern_compiled_length  Set the maximum length of a compiled pattern
    pcre2_set_max_pattern_length  Set the maximum length of a pattern
    pcre2_set_max_varlookbehind  Set the maximum match length for a variable-length lookbehind
    pcre2_set_newline  Set the newline convention
    pcre2_set_offset_limit  Set the offset limit
    pcre2_set_parens_nest_limit  Set the parentheses nesting limit
    pcre2_set_recursion_limit  Obsolete: use pcre2_set_depth_limit
    pcre2_set_recursion_memory_management  Obsolete function that (from 10.30 onwards) does nothing
    pcre2_substitute  Match a compiled pattern to a subject string and do - substitutions
    pcre2_substring_copy_byname  Extract named substring into given buffer
    pcre2_substring_copy_bynumber  Extract numbered substring into given buffer
    pcre2_substring_free  Free extracted substring
    pcre2_substring_get_byname  Extract named substring into new memory
    pcre2_substring_get_bynumber  Extract numbered substring into new memory
    pcre2_substring_length_byname  Find length of named substring
    pcre2_substring_length_bynumber  Find length of numbered substring
    pcre2_substring_list_free  Free list of extracted substrings
    pcre2_substring_list_get  Extract all substrings into new memory
    pcre2_substring_nametable_scan  Find table entries for given string name
    pcre2_substring_number_from_name  Convert captured string name to number
    - - - diff --git a/doc/pcre2-config.1 b/doc/pcre2-config.1 index 7fa0a09..7bbc562 100644 --- a/doc/pcre2-config.1 +++ b/doc/pcre2-config.1 @@ -1,4 +1,4 @@ -.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.00" +.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.45-RC1" .SH NAME pcre2-config - program to return PCRE2 configuration .SH SYNOPSIS diff --git a/doc/pcre2-config.txt b/doc/pcre2-config.txt index dc8cf8f..b44a586 100644 --- a/doc/pcre2-config.txt +++ b/doc/pcre2-config.txt @@ -1,4 +1,3 @@ - PCRE2-CONFIG(1) General Commands Manual PCRE2-CONFIG(1) @@ -82,4 +81,4 @@ REVISION Last updated: 28 September 2014 -PCRE2 10.00 28 September 2014 PCRE2-CONFIG(1) +PCRE2 10.45-RC1 28 September 2014 PCRE2-CONFIG(1) diff --git a/doc/pcre2.3 b/doc/pcre2.3 index fa91450..e1770ec 100644 --- a/doc/pcre2.3 +++ b/doc/pcre2.3 @@ -1,4 +1,4 @@ -.TH PCRE2 3 "27 August 2021" "PCRE2 10.38" +.TH PCRE2 3 "18 December 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH INTRODUCTION @@ -186,23 +186,26 @@ In the "man" and HTML formats, there is also a short page for each C library function, listing its arguments and results. . . -.SH AUTHOR +.SH AUTHORS .rs .sp -.nf -Philip Hazel -Retired from University Computing Service -Cambridge, England. -.fi +The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg. +.P +PCRE2 was written by Philip Hazel, of the University Computing Service, +Cambridge, England. Many others have also contributed. .P -Putting an actual email address here is a spam magnet. If you want to email me, -use my two names separated by a dot at gmail.com. +To contact the maintainers, please use the GitHub issues tracker or PCRE2 +mailing list, as described at the project page: +.\" HTML +.\" +https://github.com/PCRE2Project/pcre2 +.\" . . .SH REVISION .rs .sp .nf -Last updated: 27 August 2021 +Last updated: 18 December 2024 Copyright (c) 1997-2021 University of Cambridge. .fi diff --git a/doc/pcre2.txt b/doc/pcre2.txt index 85eead6..b929fce 100644 --- a/doc/pcre2.txt +++ b/doc/pcre2.txt @@ -8,7 +8,6 @@ pcre2test commands. ----------------------------------------------------------------------------- - PCRE2(3) Library Functions Manual PCRE2(3) @@ -171,27 +170,29 @@ USER DOCUMENTATION library function, listing its arguments and results. -AUTHOR +AUTHORS - Philip Hazel - Retired from University Computing Service - Cambridge, England. + The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Her- + czeg. + + PCRE2 was written by Philip Hazel, of the University Computing Service, + Cambridge, England. Many others have also contributed. - Putting an actual email address here is a spam magnet. If you want to - email me, use my two names separated by a dot at gmail.com. + To contact the maintainers, please use the GitHub issues tracker or + PCRE2 mailing list, as described at the project page: + https://github.com/PCRE2Project/pcre2 REVISION - Last updated: 27 August 2021 + Last updated: 18 December 2024 Copyright (c) 1997-2021 University of Cambridge. -PCRE2 10.38 27 August 2021 PCRE2(3) +PCRE2 10.45-RC1 18 December 2024 PCRE2(3) ------------------------------------------------------------------------------ - PCRE2API(3) Library Functions Manual PCRE2API(3) @@ -298,6 +299,9 @@ PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, int (*guard_function)(uint32_t, void *), void *user_data); + int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); + PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS @@ -317,6 +321,12 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS int (*callout_function)(pcre2_substitute_callout_block *, void *), void *callout_data); + int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); + int pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE value); @@ -858,6 +868,7 @@ PCRE2 CONTEXTS The compile time nested parentheses limit The maximum length of the pattern string The extra options bits (none set by default) + Which performance optimizations the compiler should apply A compile context is also required if you are using custom memory man- agement. If none of these apply, just pass NULL as the context argu- @@ -980,6 +991,110 @@ PCRE2 CONTEXTS ment of pcre2_set_compile_recursion_guard(). The callout function should return zero if all is well, or non-zero to force an error. + int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); + + PCRE2 can apply various performance optimizations during compilation, + in order to make matching faster. For example, the compiler might con- + vert some regex constructs into an equivalent construct which + pcre2_match() can execute faster. By default, all available optimiza- + tions are enabled. However, in rare cases, one might wish to disable + specific optimizations. For example, if it is known that some optimiza- + tions cannot benefit a certain regex, it might be desirable to disable + them, in order to speed up compilation. + + The permitted values of directive are as follows: + + PCRE2_OPTIMIZATION_FULL + + Enable all optional performance optimizations. This is the default + value. + + PCRE2_OPTIMIZATION_NONE + + Disable all optional performance optimizations. + + PCRE2_AUTO_POSSESS + PCRE2_AUTO_POSSESS_OFF + + Enable/disable "auto-possessification" of variable quantifiers such as + * and +. This optimization, for example, turns a+b into a++b in order + to avoid backtracks into a+ that can never be successful. However, if + callouts are in use, auto-possessification means that some callouts are + never taken. You can disable this optimization if you want the matching + functions to do a full, unoptimized search and run all the callouts. + + PCRE2_DOTSTAR_ANCHOR + PCRE2_DOTSTAR_ANCHOR_OFF + + Enable/disable an optimization that is applied when .* is the first + significant item in a top-level branch of a pattern, and all the other + branches also start with .* or with \A or \G or ^. Such a pattern is + automatically anchored if PCRE2_DOTALL is set for all the .* items and + PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that + any match must start either at the start of the subject or following a + newline is remembered. Like other optimizations, this can cause call- + outs to be skipped. + + Dotstar anchor optimization is automatically disabled for .* if it is + inside an atomic group or a capture group that is the subject of a + backreference, or if the pattern contains (*PRUNE) or (*SKIP). + + PCRE2_START_OPTIMIZE + PCRE2_START_OPTIMIZE_OFF + + Enable/disable optimizations which cause matching functions to scan the + subject string for specific code unit values before attempting a match. + For example, if it is known that an unanchored match must start with a + specific value, the matching code searches the subject for that value, + and fails immediately if it cannot find it, without actually running + the main matching function. This means that a special item such as + (*COMMIT) at the start of a pattern is not considered until after a + suitable starting point for the match has been found. Also, when call- + outs or (*MARK) items are in use, these "start-up" optimizations can + cause them to be skipped if the pattern is never actually used. The + start-up optimizations are in effect a pre-scan of the subject that + takes place before the pattern is run. + + Disabling start-up optimizations ensures that in cases where the result + is "no match", the callouts do occur, and that items such as (*COMMIT) + and (*MARK) are considered at every possible starting position in the + subject string. + + Disabling start-up optimizations may change the outcome of a matching + operation. Consider the pattern + + (*COMMIT)ABC + + When this is compiled, PCRE2 records the fact that a match must start + with the character "A". Suppose the subject string is "DEFABC". The + start-up optimization scans along the subject, finds "A" and runs the + first match attempt from there. The (*COMMIT) item means that the pat- + tern must match the current starting position, which in this case, it + does. However, if the same match is run without start-up optimizations, + the initial scan along the subject string does not happen. The first + match attempt is run starting from "D" and when this fails, (*COMMIT) + prevents any further matches being tried, so the overall result is "no + match". + + Another start-up optimization makes use of a minimum length for a + matching subject, which is recorded when possible. Consider the pattern + + (*MARK:1)B(*MARK:2)(X|Y) + + The minimum length for a match is two characters. If the subject is + "XXBB", the "starting character" optimization skips "XX", then tries to + match "BB", which is long enough. In the process, (*MARK:2) is encoun- + tered and remembered. When the match attempt fails, the next "B" is + found, but there is only one character left, so there are no more at- + tempts, and "no match" is returned with the "last mark seen" set to + "2". Without start-up optimizations, however, matches are tried at + every possible starting position, including at the end of the subject, + where (*MARK:1) is encountered, but there is no "B", so the "last mark + seen" that is returned is "1". In this case, the optimizations do not + affect the overall match result, which is still "no match", but they do + affect the auxiliary information that is returned. + The match context A match context is required if you want to: @@ -1025,6 +1140,16 @@ PCRE2 CONTEXTS tion made by pcre2_substitute(). Details are given in the section enti- tled "Creating a new string with substitutions" below. + int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); + + This sets up a callout function for PCRE2 to call when performing case + transformations inside pcre2_substitute(). Details are given in the + section entitled "Creating a new string with substitutions" below. + int pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE value); @@ -1224,8 +1349,11 @@ CHECKING BUILD-TIME OPTIONS The output is a uint32_t integer that is set to one if support for just-in-time compiling is included in the library; otherwise it is set to zero. Note that having the support in the library does not guarantee - that JIT will be used for any given match. See the pcre2jit documenta- - tion for more details. + that JIT will be used for any given match, and neither does it guaran- + tee that JIT will actually be able to function, because it may not be + able to allocate executable memory in some environments. There is a + special call to pcre2_jit_compile() that can be used to check this. See + the pcre2jit documentation for more details. PCRE2_CONFIG_JITTARGET @@ -1413,10 +1541,10 @@ COMPILING A PATTERN spectively, when pcre2_compile() returns NULL because a compilation er- ror has occurred. - There are nearly 100 positive error codes that pcre2_compile() may re- - turn if it finds an error in the pattern. There are also some negative - error codes that are used for invalid UTF strings when validity check- - ing is in force. These are the same as given by pcre2_match() and + There are over 100 positive error codes that pcre2_compile() may return + if it finds an error in the pattern. There are also some negative error + codes that are used for invalid UTF strings when validity checking is + in force. These are the same as given by pcre2_match() and pcre2_dfa_match(), and are described in the pcre2unicode documentation. There is no separate documentation for the positive error codes, be- cause the textual error messages that are obtained by calling the @@ -1511,39 +1639,56 @@ COMPILING A PATTERN Perl. If you want a multiline circumflex also to match after a termi- nating newline, you must set PCRE2_ALT_CIRCUMFLEX. + PCRE2_ALT_EXTENDED_CLASS + + Alters the parsing of character classes to follow the extended syntax + described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no + impact on the behaviour of the Perl-specific "(?[...])" syntax for ex- + tended classes, but instead enables the alternative syntax of extended + class behaviour inside ordinary "[...]" character classes. See the + pcre2pattern documentation for details of the character classes sup- + ported. + PCRE2_ALT_VERBNAMES - By default, for compatibility with Perl, the name in any verb sequence - such as (*MARK:NAME) is any sequence of characters that does not in- - clude a closing parenthesis. The name is not processed in any way, and - it is not possible to include a closing parenthesis in the name. How- - ever, if the PCRE2_ALT_VERBNAMES option is set, normal backslash pro- - cessing is applied to verb names and only an unescaped closing paren- - thesis terminates the name. A closing parenthesis can be included in a - name either as \) or between \Q and \E. If the PCRE2_EXTENDED or - PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped - whitespace in verb names is skipped and #-comments are recognized, ex- + By default, for compatibility with Perl, the name in any verb sequence + such as (*MARK:NAME) is any sequence of characters that does not in- + clude a closing parenthesis. The name is not processed in any way, and + it is not possible to include a closing parenthesis in the name. How- + ever, if the PCRE2_ALT_VERBNAMES option is set, normal backslash pro- + cessing is applied to verb names and only an unescaped closing paren- + thesis terminates the name. A closing parenthesis can be included in a + name either as \) or between \Q and \E. If the PCRE2_EXTENDED or + PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped + whitespace in verb names is skipped and #-comments are recognized, ex- actly as in the rest of the pattern. PCRE2_AUTO_CALLOUT - If this bit is set, pcre2_compile() automatically inserts callout - items, all with number 255, before each pattern item, except immedi- - ately before or after an explicit callout in the pattern. For discus- + If this bit is set, pcre2_compile() automatically inserts callout + items, all with number 255, before each pattern item, except immedi- + ately before or after an explicit callout in the pattern. For discus- sion of the callout facility, see the pcre2callout documentation. PCRE2_CASELESS - If this bit is set, letters in the pattern match both upper and lower - case letters in the subject. It is equivalent to Perl's /i option, and - it can be changed within a pattern by a (?i) option setting. If either - PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all - characters with more than one other case, and for all characters whose - code points are greater than U+007F. Note that there are two ASCII - characters, K and S, that, in addition to their lower case ASCII equiv- - alents, are case-equivalent with U+212A (Kelvin sign) and U+017F (long - S) respectively. If you do not want this case equivalence, you can sup- - press it by setting PCRE2_EXTRA_CASELESS_RESTRICT. + If this bit is set, letters in the pattern match both upper and lower + case letters in the subject. It is equivalent to Perl's /i option, and + it can be changed within a pattern by a (?i) option setting. If either + PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all + characters with more than one other case, and for all characters whose + code points are greater than U+007F. + + Note that there are two ASCII characters, K and S, that, in addition to + their lower case ASCII equivalents, are case-equivalent with U+212A + (Kelvin sign) and U+017F (long S) respectively. If you do not want this + case equivalence, you can suppress it by setting PCRE2_EXTRA_CASE- + LESS_RESTRICT. + + One language family, Turkish and Azeri, has its own case-insensitivity + rules, which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. + This alters the behaviour of the 'i', 'I', U+0130 (capital I with dot + above), and U+0131 (small dotless i) characters. For lower valued characters with only one other case, a lookup table is used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup @@ -1551,201 +1696,206 @@ COMPILING A PATTERN (available only in 16-bit or 32-bit mode) are treated as not having an- other case. + From release 10.45 PCRE2_CASELESS also affects what some of the letter- + related Unicode property escapes (\p and \P) match. The properties Lu + (upper case letter), Ll (lower case letter), and Lt (title case letter) + are all treated as LC (cased letter) when PCRE2_CASELESS is set. + PCRE2_DOLLAR_ENDONLY - If this bit is set, a dollar metacharacter in the pattern matches only - at the end of the subject string. Without this option, a dollar also - matches immediately before a newline at the end of the string (but not - before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored - if PCRE2_MULTILINE is set. There is no equivalent to this option in + If this bit is set, a dollar metacharacter in the pattern matches only + at the end of the subject string. Without this option, a dollar also + matches immediately before a newline at the end of the string (but not + before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored + if PCRE2_MULTILINE is set. There is no equivalent to this option in Perl, and no way to set it within a pattern. PCRE2_DOTALL - If this bit is set, a dot metacharacter in the pattern matches any - character, including one that indicates a newline. However, it only + If this bit is set, a dot metacharacter in the pattern matches any + character, including one that indicates a newline. However, it only ever matches one character, even if newlines are coded as CRLF. Without this option, a dot does not match when the current position in the sub- - ject is at a newline. This option is equivalent to Perl's /s option, + ject is at a newline. This option is equivalent to Perl's /s option, and it can be changed within a pattern by a (?s) option setting. A neg- - ative class such as [^a] always matches newline characters, and the \N - escape sequence always matches a non-newline character, independent of + ative class such as [^a] always matches newline characters, and the \N + escape sequence always matches a non-newline character, independent of the setting of PCRE2_DOTALL. PCRE2_DUPNAMES - If this bit is set, names used to identify capture groups need not be - unique. This can be helpful for certain types of pattern when it is - known that only one instance of the named group can ever be matched. - There are more details of named capture groups below; see also the + If this bit is set, names used to identify capture groups need not be + unique. This can be helpful for certain types of pattern when it is + known that only one instance of the named group can ever be matched. + There are more details of named capture groups below; see also the pcre2pattern documentation. PCRE2_ENDANCHORED - If this bit is set, the end of any pattern match must be right at the + If this bit is set, the end of any pattern match must be right at the end of the string being searched (the "subject string"). If the pattern match succeeds by reaching (*ACCEPT), but does not reach the end of the - subject, the match fails at the current starting point. For unanchored - patterns, a new match is then tried at the next starting point. How- + subject, the match fails at the current starting point. For unanchored + patterns, a new match is then tried at the next starting point. How- ever, if the match succeeds by reaching the end of the pattern, but not - the end of the subject, backtracking occurs and an alternative match + the end of the subject, backtracking occurs and an alternative match may be found. Consider these two patterns: .(*ACCEPT)|.. .|.. - If matched against "abc" with PCRE2_ENDANCHORED set, the first matches - "c" whereas the second matches "bc". The effect of PCRE2_ENDANCHORED - can also be achieved by appropriate constructs in the pattern itself, + If matched against "abc" with PCRE2_ENDANCHORED set, the first matches + "c" whereas the second matches "bc". The effect of PCRE2_ENDANCHORED + can also be achieved by appropriate constructs in the pattern itself, which is the only way to do it in Perl. For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only - to the first (that is, the longest) matched string. Other parallel - matches, which are necessarily substrings of the first one, must obvi- + to the first (that is, the longest) matched string. Other parallel + matches, which are necessarily substrings of the first one, must obvi- ously end before the end of the subject. PCRE2_EXTENDED - If this bit is set, most white space characters in the pattern are to- - tally ignored except when escaped, inside a character class, or inside - a \Q...\E sequence. However, white space is not allowed within se- - quences such as (?> that introduce various parenthesized groups, nor - within numerical quantifiers such as {1,3}. Ignorable white space is - permitted between an item and a following quantifier and between a - quantifier and a following + that indicates possessiveness. PCRE2_EX- - TENDED is equivalent to Perl's /x option, and it can be changed within + If this bit is set, most white space characters in the pattern are to- + tally ignored except when escaped, inside a character class, or inside + a \Q...\E sequence. However, white space is not allowed within se- + quences such as (?> that introduce various parenthesized groups, nor + within numerical quantifiers such as {1,3}. Ignorable white space is + permitted between an item and a following quantifier and between a + quantifier and a following + that indicates possessiveness. PCRE2_EX- + TENDED is equivalent to Perl's /x option, and it can be changed within a pattern by a (?x) option setting. - When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog- - nizes as white space only those characters with code points less than + When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog- + nizes as white space only those characters with code points less than 256 that are flagged as white space in its low-character table. The ta- ble is normally created by pcre2_maketables(), which uses the isspace() - function to identify space characters. In most ASCII environments, the - relevant characters are those with code points 0x0009 (tab), 0x000A - (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage + function to identify space characters. In most ASCII environments, the + relevant characters are those with code points 0x0009 (tab), 0x000A + (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage return), and 0x0020 (space). When PCRE2 is compiled with Unicode support, in addition to these char- - acters, five more Unicode "Pattern White Space" characters are recog- + acters, five more Unicode "Pattern White Space" characters are recog- nized by PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to- - right mark), U+200F (right-to-left mark), U+2028 (line separator), and - U+2029 (paragraph separator). This set of characters is the same as - recognized by Perl's /x option. Note that the horizontal and vertical - space characters that are matched by the \h and \v escapes in patterns + right mark), U+200F (right-to-left mark), U+2028 (line separator), and + U+2029 (paragraph separator). This set of characters is the same as + recognized by Perl's /x option. Note that the horizontal and vertical + space characters that are matched by the \h and \v escapes in patterns are a much bigger set. - As well as ignoring most white space, PCRE2_EXTENDED also causes char- - acters between an unescaped # outside a character class and the next - newline, inclusive, to be ignored, which makes it possible to include + As well as ignoring most white space, PCRE2_EXTENDED also causes char- + acters between an unescaped # outside a character class and the next + newline, inclusive, to be ignored, which makes it possible to include comments inside complicated patterns. Note that the end of this type of - comment is a literal newline sequence in the pattern; escape sequences + comment is a literal newline sequence in the pattern; escape sequences that happen to represent a newline do not count. Which characters are interpreted as newlines can be specified by a set- - ting in the compile context that is passed to pcre2_compile() or by a - special sequence at the start of the pattern, as described in the sec- - tion entitled "Newline conventions" in the pcre2pattern documentation. + ting in the compile context that is passed to pcre2_compile() or by a + special sequence at the start of the pattern, as described in the sec- + tion entitled "Newline conventions" in the pcre2pattern documentation. A default is defined when PCRE2 is built. PCRE2_EXTENDED_MORE - This option has the effect of PCRE2_EXTENDED, but, in addition, un- - escaped space and horizontal tab characters are ignored inside a char- - acter class. Note: only these two characters are ignored, not the full - set of pattern white space characters that are ignored outside a char- - acter class. PCRE2_EXTENDED_MORE is equivalent to Perl's /xx option, + This option has the effect of PCRE2_EXTENDED, but, in addition, un- + escaped space and horizontal tab characters are ignored inside a char- + acter class. Note: only these two characters are ignored, not the full + set of pattern white space characters that are ignored outside a char- + acter class. PCRE2_EXTENDED_MORE is equivalent to Perl's /xx option, and it can be changed within a pattern by a (?xx) option setting. PCRE2_FIRSTLINE If this option is set, the start of an unanchored pattern match must be - before or at the first newline in the subject string following the - start of matching, though the matched text may continue over the new- + before or at the first newline in the subject string following the + start of matching, though the matched text may continue over the new- line. If startoffset is non-zero, the limiting newline is not necessar- - ily the first newline in the subject. For example, if the subject + ily the first newline in the subject. For example, if the subject string is "abc\nxyz" (where \n represents a single-character newline) a - pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is - greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more - general limiting facility. If PCRE2_FIRSTLINE is set with an offset - limit, a match must occur in the first line and also within the offset + pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is + greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more + general limiting facility. If PCRE2_FIRSTLINE is set with an offset + limit, a match must occur in the first line and also within the offset limit. In other words, whichever limit comes first is used. This option has no effect for anchored patterns. PCRE2_LITERAL If this option is set, all meta-characters in the pattern are disabled, - and it is treated as a literal string. Matching literal strings with a + and it is treated as a literal string. Matching literal strings with a regular expression engine is not the most efficient way of doing it. If - you are doing a lot of literal matching and are worried about effi- + you are doing a lot of literal matching and are worried about effi- ciency, you should consider using other approaches. The only other main options that are allowed with PCRE2_LITERAL are: PCRE2_ANCHORED, PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, PCRE2_CASELESS, PCRE2_FIRSTLINE, PCRE2_MATCH_INVALID_UTF, PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, - PCRE2_UTF, and PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EX- + PCRE2_UTF, and PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EX- TRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error. PCRE2_MATCH_INVALID_UTF - This option forces PCRE2_UTF (see below) and also enables support for - matching by pcre2_match() in subject strings that contain invalid UTF - sequences. Note, however, that the 16-bit and 32-bit PCRE2 libraries - process strings as sequences of uint16_t or uint32_t code points. They + This option forces PCRE2_UTF (see below) and also enables support for + matching by pcre2_match() in subject strings that contain invalid UTF + sequences. Note, however, that the 16-bit and 32-bit PCRE2 libraries + process strings as sequences of uint16_t or uint32_t code points. They cannot find valid UTF sequences within an arbitrary string of bytes un- - less such sequences are suitably aligned. This facility is not sup- - ported for DFA matching. For details, see the pcre2unicode documenta- + less such sequences are suitably aligned. This facility is not sup- + ported for DFA matching. For details, see the pcre2unicode documenta- tion. PCRE2_MATCH_UNSET_BACKREF - If this option is set, a backreference to an unset capture group - matches an empty string (by default this causes the current matching + If this option is set, a backreference to an unset capture group + matches an empty string (by default this causes the current matching alternative to fail). A pattern such as (\1)(a) succeeds when this op- - tion is set (assuming it can find an "a" in the subject), whereas it - fails by default, for Perl compatibility. Setting this option makes + tion is set (assuming it can find an "a" in the subject), whereas it + fails by default, for Perl compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka JavaScript). PCRE2_MULTILINE - By default, for the purposes of matching "start of line" and "end of - line", PCRE2 treats the subject string as consisting of a single line - of characters, even if it actually contains newlines. The "start of - line" metacharacter (^) matches only at the start of the string, and - the "end of line" metacharacter ($) matches only at the end of the - string, or before a terminating newline (except when PCRE2_DOLLAR_EN- + By default, for the purposes of matching "start of line" and "end of + line", PCRE2 treats the subject string as consisting of a single line + of characters, even if it actually contains newlines. The "start of + line" metacharacter (^) matches only at the start of the string, and + the "end of line" metacharacter ($) matches only at the end of the + string, or before a terminating newline (except when PCRE2_DOLLAR_EN- DONLY is set). Note, however, that unless PCRE2_DOTALL is set, the "any - character" metacharacter (.) does not match at a newline. This behav- + character" metacharacter (.) does not match at a newline. This behav- iour (for ^, $, and dot) is the same as Perl. - When PCRE2_MULTILINE it is set, the "start of line" and "end of line" - constructs match immediately following or immediately before internal - newlines in the subject string, respectively, as well as at the very - start and end. This is equivalent to Perl's /m option, and it can be + When PCRE2_MULTILINE it is set, the "start of line" and "end of line" + constructs match immediately following or immediately before internal + newlines in the subject string, respectively, as well as at the very + start and end. This is equivalent to Perl's /m option, and it can be changed within a pattern by a (?m) option setting. Note that the "start of line" metacharacter does not match after a newline at the end of the - subject, for compatibility with Perl. However, you can change this by - setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a - subject string, or no occurrences of ^ or $ in a pattern, setting + subject, for compatibility with Perl. However, you can change this by + setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a + subject string, or no occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect. PCRE2_NEVER_BACKSLASH_C - This option locks out the use of \C in the pattern that is being com- - piled. This escape can cause unpredictable behaviour in UTF-8 or - UTF-16 modes, because it may leave the current matching point in the + This option locks out the use of \C in the pattern that is being com- + piled. This escape can cause unpredictable behaviour in UTF-8 or + UTF-16 modes, because it may leave the current matching point in the middle of a multi-code-unit character. This option may be useful in ap- plications that process patterns from external sources. Note that there is also a build-time option that permanently locks out the use of \C. PCRE2_NEVER_UCP - This option locks out the use of Unicode properties for handling \B, + This option locks out the use of Unicode properties for handling \B, \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as - described for the PCRE2_UCP option below. In particular, it prevents - the creator of the pattern from enabling this facility by starting the - pattern with (*UCP). This option may be useful in applications that - process patterns from external sources. The option combination PCRE_UCP - and PCRE_NEVER_UCP causes an error. + described for the PCRE2_UCP option below. In particular, it prevents + the creator of the pattern from enabling this facility by starting the + pattern with (*UCP). This option may be useful in applications that + process patterns from external sources. The option combination + PCRE2_UCP and PCRE2_NEVER_UCP causes an error. PCRE2_NEVER_UTF @@ -1769,86 +1919,56 @@ COMPILING A PATTERN PCRE2_NO_AUTO_POSSESS - If this option is set, it disables "auto-possessification", which is an - optimization that, for example, turns a+b into a++b in order to avoid - backtracks into a+ that can never be successful. However, if callouts - are in use, auto-possessification means that some callouts are never - taken. You can set this option if you want the matching functions to do - a full unoptimized search and run all the callouts, but it is mainly - provided for testing purposes. + If this (deprecated) option is set, it disables "auto-possessifica- + tion", which is an optimization that, for example, turns a+b into a++b + in order to avoid backtracks into a+ that can never be successful. How- + ever, if callouts are in use, auto-possessification means that some + callouts are never taken. You can set this option if you want the + matching functions to do a full unoptimized search and run all the + callouts, but it is mainly provided for testing purposes. + + If a compile context is available, it is recommended to use + pcre2_set_optimize() with the directive PCRE2_AUTO_POSSESS_OFF rather + than the compile option PCRE2_NO_AUTO_POSSESS. Note that + PCRE2_NO_AUTO_POSSESS takes precedence over the pcre2_set_optimize() + optimization directives PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF. PCRE2_NO_DOTSTAR_ANCHOR - If this option is set, it disables an optimization that is applied when - .* is the first significant item in a top-level branch of a pattern, - and all the other branches also start with .* or with \A or \G or ^. - The optimization is automatically disabled for .* if it is inside an - atomic group or a capture group that is the subject of a backreference, - or if the pattern contains (*PRUNE) or (*SKIP). When the optimization - is not disabled, such a pattern is automatically anchored if + If this (deprecated) option is set, it disables an optimization that is + applied when .* is the first significant item in a top-level branch of + a pattern, and all the other branches also start with .* or with \A or + \G or ^. The optimization is automatically disabled for .* if it is in- + side an atomic group or a capture group that is the subject of a back- + reference, or if the pattern contains (*PRUNE) or (*SKIP). When the op- + timization is not disabled, such a pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set - for any ^ items. Otherwise, the fact that any match must start either - at the start of the subject or following a newline is remembered. Like - other optimizations, this can cause callouts to be skipped. + for any ^ items. Otherwise, the fact that any match must start either + at the start of the subject or following a newline is remembered. Like + other optimizations, this can cause callouts to be skipped. (If a com- + pile context is available, it is recommended to use pcre2_set_opti- + mize() with the directive PCRE2_DOTSTAR_ANCHOR_OFF instead.) PCRE2_NO_START_OPTIMIZE This is an option whose main effect is at matching time. It does not change what pcre2_compile() generates, but it does affect the output of - the JIT compiler. + the JIT compiler. Setting this option is equivalent to calling + pcre2_set_optimize() with the directive parameter set to + PCRE2_START_OPTIMIZE_OFF. There are a number of optimizations that may occur at the start of a match, in order to speed up the process. For example, if it is known that an unanchored match must start with a specific code unit value, the matching code searches the subject for that value, and fails imme- diately if it cannot find it, without actually running the main match- - ing function. This means that a special item such as (*COMMIT) at the - start of a pattern is not considered until after a suitable starting - point for the match has been found. Also, when callouts or (*MARK) - items are in use, these "start-up" optimizations can cause them to be - skipped if the pattern is never actually used. The start-up optimiza- - tions are in effect a pre-scan of the subject that takes place before - the pattern is run. - - The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, - possibly causing performance to suffer, but ensuring that in cases - where the result is "no match", the callouts do occur, and that items - such as (*COMMIT) and (*MARK) are considered at every possible starting - position in the subject string. - - Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching - operation. Consider the pattern + ing function. The start-up optimizations are in effect a pre-scan of + the subject that takes place before the pattern is run. - (*COMMIT)ABC - - When this is compiled, PCRE2 records the fact that a match must start - with the character "A". Suppose the subject string is "DEFABC". The - start-up optimization scans along the subject, finds "A" and runs the - first match attempt from there. The (*COMMIT) item means that the pat- - tern must match the current starting position, which in this case, it - does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE - set, the initial scan along the subject string does not happen. The - first match attempt is run starting from "D" and when this fails, - (*COMMIT) prevents any further matches being tried, so the overall re- - sult is "no match". - - As another start-up optimization makes use of a minimum length for a - matching subject, which is recorded when possible. Consider the pattern - - (*MARK:1)B(*MARK:2)(X|Y) - - The minimum length for a match is two characters. If the subject is - "XXBB", the "starting character" optimization skips "XX", then tries to - match "BB", which is long enough. In the process, (*MARK:2) is encoun- - tered and remembered. When the match attempt fails, the next "B" is - found, but there is only one character left, so there are no more at- - tempts, and "no match" is returned with the "last mark seen" set to - "2". If NO_START_OPTIMIZE is set, however, matches are tried at every - possible starting position, including at the end of the subject, where - (*MARK:1) is encountered, but there is no "B", so the "last mark seen" - that is returned is "1". In this case, the optimizations do not affect - the overall match result, which is still "no match", but they do affect - the auxiliary information that is returned. + Disabling the start-up optimizations may cause performance to suffer. + However, this may be desirable for patterns which contain callouts or + items such as (*COMMIT) and (*MARK). See the above description of + PCRE2_START_OPTIMIZE_OFF for further details. PCRE2_NO_UTF_CHECK @@ -1892,41 +2012,46 @@ COMPILING A PATTERN ties for upper/lower casing operations, even when PCRE2_UTF is not set. This makes it possible to process strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has been compiled with Unicode - support (which is the default). The PCRE2_EXTRA_CASELESS_RESTRICT op- - tion (see below) restricts caseless matching such that ASCII characters - match only ASCII characters and non-ASCII characters match only non- - ASCII characters. + support (which is the default). + + The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless + matching such that ASCII characters match only ASCII characters and + non-ASCII characters match only non-ASCII characters. The PCRE2_EX- + TRA_TURKISH_CASING option (see above) alters the matching of the 'i' + characters to follow their behaviour in Turkish and Azeri languages. + For further details on PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EX- + TRA_TURKISH_CASING, see the pcre2unicode page. PCRE2_UNGREEDY - This option inverts the "greediness" of the quantifiers so that they - are not greedy by default, but become greedy if followed by "?". It is - not compatible with Perl. It can also be set by a (?U) option setting + This option inverts the "greediness" of the quantifiers so that they + are not greedy by default, but become greedy if followed by "?". It is + not compatible with Perl. It can also be set by a (?U) option setting within the pattern. PCRE2_USE_OFFSET_LIMIT This option must be set for pcre2_compile() if pcre2_set_offset_limit() - is going to be used to set a non-default offset limit in a match con- - text for matches that use this pattern. An error is generated if an - offset limit is set without this option. For more details, see the de- - scription of pcre2_set_offset_limit() in the section that describes + is going to be used to set a non-default offset limit in a match con- + text for matches that use this pattern. An error is generated if an + offset limit is set without this option. For more details, see the de- + scription of pcre2_set_offset_limit() in the section that describes match contexts. See also the PCRE2_FIRSTLINE option above. PCRE2_UTF - This option causes PCRE2 to regard both the pattern and the subject - strings that are subsequently processed as strings of UTF characters - instead of single-code-unit strings. It is available when PCRE2 is - built to include Unicode support (which is the default). If Unicode + This option causes PCRE2 to regard both the pattern and the subject + strings that are subsequently processed as strings of UTF characters + instead of single-code-unit strings. It is available when PCRE2 is + built to include Unicode support (which is the default). If Unicode support is not available, the use of this option provokes an error. De- - tails of how PCRE2_UTF changes the behaviour of PCRE2 are given in the + tails of how PCRE2_UTF changes the behaviour of PCRE2 are given in the pcre2unicode page. In particular, note that it changes the way PCRE2_CASELESS works. Extra compile options - The option bits that can be set in a compile context by calling the + The option bits that can be set in a compile context by calling the pcre2_set_compile_extra_options() function are as follows: PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK @@ -1938,102 +2063,102 @@ COMPILING A PATTERN PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES - This option applies when compiling a pattern in UTF-8 or UTF-32 mode. - It is forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode + This option applies when compiling a pattern in UTF-8 or UTF-32 mode. + It is forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate" code points in the range 0xd800 to 0xdfff are used in pairs - in UTF-16 to encode code points with values in the range 0x10000 to - 0x10ffff. The surrogates cannot therefore be represented in UTF-16. + in UTF-16 to encode code points with values in the range 0x10000 to + 0x10ffff. The surrogates cannot therefore be represented in UTF-16. They can be represented in UTF-8 and UTF-32, but are defined as invalid - code points, and cause errors if encountered in a UTF-8 or UTF-32 + code points, and cause errors if encountered in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. - These values also cause errors if encountered in escape sequences such + These values also cause errors if encountered in escape sequences such as \x{d912} within a pattern. However, it seems that some applications, when using PCRE2 to check for unwanted characters in UTF-8 strings, ex- - plicitly test for the surrogates using escape sequences. The - PCRE2_NO_UTF_CHECK option does not disable the error that occurs, be- + plicitly test for the surrogates using escape sequences. The + PCRE2_NO_UTF_CHECK option does not disable the error that occurs, be- cause it applies only to the testing of input strings for UTF validity. - If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surro- - gate code point values in UTF-8 and UTF-32 patterns no longer provoke - errors and are incorporated in the compiled pattern. However, they can - only match subject characters if the matching function is called with + If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surro- + gate code point values in UTF-8 and UTF-32 patterns no longer provoke + errors and are incorporated in the compiled pattern. However, they can + only match subject characters if the matching function is called with PCRE2_NO_UTF_CHECK set. PCRE2_EXTRA_ALT_BSUX - The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and - \x in the way that ECMAscript (aka JavaScript) does. Additional func- + The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and + \x in the way that ECMAscript (aka JavaScript) does. Additional func- tionality was defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has - the effect of PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} + the effect of PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal character code, where hhh.. is any number of hexadeci- mal digits. PCRE2_EXTRA_ASCII_BSD - This option forces \d to match only ASCII digits, even when PCRE2_UCP - is set. It can be changed within a pattern by means of the (?aD) op- + This option forces \d to match only ASCII digits, even when PCRE2_UCP + is set. It can be changed within a pattern by means of the (?aD) op- tion setting. PCRE2_EXTRA_ASCII_BSS - This option forces \s to match only ASCII space characters, even when - PCRE2_UCP is set. It can be changed within a pattern by means of the + This option forces \s to match only ASCII space characters, even when + PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS) option setting. PCRE2_EXTRA_ASCII_BSW - This option forces \w to match only ASCII word characters, even when - PCRE2_UCP is set. It can be changed within a pattern by means of the + This option forces \w to match only ASCII word characters, even when + PCRE2_UCP is set. It can be changed within a pattern by means of the (?aW) option setting. PCRE2_EXTRA_ASCII_DIGIT This option forces the POSIX character classes [:digit:] and [:xdigit:] - to match only ASCII digits, even when PCRE2_UCP is set. It can be + to match only ASCII digits, even when PCRE2_UCP is set. It can be changed within a pattern by means of the (?aT) option setting. PCRE2_EXTRA_ASCII_POSIX This option forces all the POSIX character classes, including [:digit:] - and [:xdigit:], to match only ASCII characters, even when PCRE2_UCP is - set. It can be changed within a pattern by means of the (?aP) option - setting, but note that this also sets PCRE2_EXTRA_ASCII_DIGIT in order + and [:xdigit:], to match only ASCII characters, even when PCRE2_UCP is + set. It can be changed within a pattern by means of the (?aP) option + setting, but note that this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets all ASCII restrictions for POSIX classes. PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL - This is a dangerous option. Use with care. By default, an unrecognized - escape such as \j or a malformed one such as \x{2z} causes a compile- + This is a dangerous option. Use with care. By default, an unrecognized + escape such as \j or a malformed one such as \x{2z} causes a compile- time error when detected by pcre2_compile(). Perl is somewhat inconsis- - tent in handling such items: for example, \j is treated as a literal - "j", and non-hexadecimal digits in \x{} are just ignored, though warn- - ings are given in both cases if Perl's warning switch is enabled. How- - ever, a malformed octal number after \o{ always causes an error in + tent in handling such items: for example, \j is treated as a literal + "j", and non-hexadecimal digits in \x{} are just ignored, though warn- + ings are given in both cases if Perl's warning switch is enabled. How- + ever, a malformed octal number after \o{ always causes an error in Perl. - If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to - pcre2_compile(), all unrecognized or malformed escape sequences are - treated as single-character escapes. For example, \j is a literal "j" - and \x{2z} is treated as the literal string "x{2z}". Setting this op- + If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to + pcre2_compile(), all unrecognized or malformed escape sequences are + treated as single-character escapes. For example, \j is a literal "j" + and \x{2z} is treated as the literal string "x{2z}". Setting this op- tion means that typos in patterns may go undetected and have unexpected - results. Also note that a sequence such as [\N{] is interpreted as a - malformed attempt at [\N{...}] and so is treated as [N{] whereas [\N] + results. Also note that a sequence such as [\N{] is interpreted as a + malformed attempt at [\N{...}] and so is treated as [N{] whereas [\N] gives an error because an unqualified \N is a valid escape sequence but - is not supported in a character class. To reiterate: this is a danger- + is not supported in a character class. To reiterate: this is a danger- ous option. Use with great care. PCRE2_EXTRA_CASELESS_RESTRICT - When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows + When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode rules, which allow for more than two cases per character. There are two case-equivalent character sets that contain both ASCII and non- ASCII characters. The ASCII letter S is case-equivalent to U+017f (long - S) and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). - This option disables recognition of case-equivalences that cross the + S) and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). + This option disables recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a caseless match, both characters must ei- - ther be ASCII or non-ASCII. The option can be changed with a pattern by - the (?r) option setting. + ther be ASCII or non-ASCII. The option can be changed within a pattern + by the (*CASELESS_RESTRICT) or (?r) option settings. PCRE2_EXTRA_ESCAPED_CR_IS_LF @@ -2062,6 +2187,36 @@ COMPILING A PATTERN end. The option may be used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is also set. + PCRE2_EXTRA_NO_BS0 + + If this option is set (note that its final character is the digit 0) it + locks out the use of the sequence \0 unless at least one more octal + digit follows. + + PCRE2_EXTRA_PYTHON_OCTAL + + If this option is set, PCRE2 follows Python's rules for interpreting + octal escape sequences. The rules for handling sequences such as \14, + which could be an octal number or a back reference are different. De- + tails are given in the pcre2pattern documentation. + + PCRE2_EXTRA_NEVER_CALLOUT + + If this option is set, PCRE2 treats callouts in the pattern as a syntax + error, returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if + the application knows that a callout will not be provided to + pcre2_match(), so that callouts in the pattern are not silently ig- + nored. + + PCRE2_EXTRA_TURKISH_CASING + + This option alters case-equivalence of the 'i' letters to follow the + alphabet used by Turkish and Azeri languages. The option can be changed + within a pattern by the (*TURKISH_CASING) start-of-pattern setting. Ei- + ther the UTF or UCP options must be set. In the 8-bit library, UTF must + be set. This option cannot be combined with PCRE2_EXTRA_CASELESS_RE- + STRICT. + JUST-IN-TIME (JIT) COMPILATION @@ -2255,6 +2410,7 @@ INFORMATION ABOUT A COMPILED PATTERN PCRE2_DOTALL is in force for .* Neither (*PRUNE) nor (*SKIP) appears in the pattern PCRE2_NO_DOTSTAR_ANCHOR is not set + Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the options returned for PCRE2_INFO_ALLOPTIONS. @@ -3520,9 +3676,9 @@ CREATING A NEW STRING WITH SUBSTITUTIONS ORY immediately. If this option is set, however, pcre2_substitute() continues to go through the motions of matching and substituting (with- out, of course, writing anything) in order to compute the size of - buffer that is needed. This value is passed back via the outlengthptr - variable, with the result of the function still being PCRE2_ER- - ROR_NOMEMORY. + buffer that is needed, which will include the extra space for the ter- + minating NUL. This value is passed back via the outlengthptr variable, + with the result of the function still being PCRE2_ERROR_NOMEMORY. Passing a buffer size of zero is a permitted way of finding out how much memory is needed for given substitution. However, this does mean @@ -3541,24 +3697,32 @@ CREATING A NEW STRING WITH SUBSTITUTIONS cape character that can specify the insertion of characters from cap- ture groups and names from (*MARK) or other control verbs in the pat- tern. Dollar is the only escape character (backslash is treated as lit- - eral). The following forms are always recognized: + eral). The following forms are recognized: $$ insert a dollar character - $ or ${} insert the contents of group + $n or ${n} insert the contents of group n + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string $*MARK or ${*MARK} insert a control verb name - Either a group number or a group name can be given for . Curly - brackets are required only if the following character would be inter- - preted as part of the number or name. The number may be zero to include - the entire matched string. For example, if the pattern a(b)c is - matched with "=abc=" and the replacement string "+$1$0$1+", the result - is "=+babcb+=". + Either a group number or a group name can be given for n, for example + $2 or $NAME. Curly brackets are required only if the following charac- + ter would be interpreted as part of the number or name. The number may + be zero to include the entire matched string. For example, if the pat- + tern a(b)c is matched with "=abc=" and the replacement string + "+$1$0$1+", the result is "=+babcb+=". + + The JavaScript form $, where the angle brackets are part of the + syntax, is also recognized for group names, but not for group numbers + or *MARK. - $*MARK inserts the name from the last encountered backtracking control - verb on the matching path that has a name. (*MARK) must always include - a name, but the other verbs need not. For example, in the case of + $*MARK inserts the name from the last encountered backtracking control + verb on the matching path that has a name. (*MARK) must always include + a name, but the other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B) - the relevant name is "B". This facility can be used to perform simple + the relevant name is "B". This facility can be used to perform simple simultaneous substitutions, as this pcre2test example shows: /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} @@ -3566,15 +3730,15 @@ CREATING A NEW STRING WITH SUBSTITUTIONS 2: pear orange PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject - string, replacing every matching substring. If this option is not set, - only the first matching substring is replaced. The search for matches - takes place in the original subject string (that is, previous replace- - ments do not affect it). Iteration is implemented by advancing the - startoffset value for each search, which is always passed the entire + string, replacing every matching substring. If this option is not set, + only the first matching substring is replaced. The search for matches + takes place in the original subject string (that is, previous replace- + ments do not affect it). Iteration is implemented by advancing the + startoffset value for each search, which is always passed the entire subject string. If an offset limit is set in the match context, search- ing stops when that limit is reached. - You can restrict the effect of a global substitution to a portion of + You can restrict the effect of a global substitution to a portion of the subject string by setting either or both of startoffset and an off- set limit. Here is a pcre2test example: @@ -3582,73 +3746,95 @@ CREATING A NEW STRING WITH SUBSTITUTIONS ABC ABC ABC ABC\=offset=3,offset_limit=12 2: ABC A!C A!C ABC - When continuing with global substitutions after matching a substring + When continuing with global substitutions after matching a substring with zero length, an attempt to find a non-empty match at the same off- set is performed. If this is not successful, the offset is advanced by one character except when CRLF is a valid newline sequence and the next - two characters are CR, LF. In this case, the offset is advanced by two + two characters are CR, LF. In this case, the offset is advanced by two characters. PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do not appear in the pattern to be treated as unset groups. This option - should be used with care, because it means that a typo in a group name + should be used with care, because it means that a typo in a group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING error. PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un- - known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated - as empty strings when inserted as described above. If this option is + known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated + as empty strings when inserted as described above. If this option is not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN- - SET error. This option does not influence the extended substitution + SET error. This option does not influence the extended substitution syntax described below. - PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the - replacement string. Without this option, only the dollar character is - special, and only the group insertion forms listed above are valid. - When PCRE2_SUBSTITUTE_EXTENDED is set, two things change: + PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the + replacement string. Without this option, only the dollar character is + special, and only the group insertion forms listed above are valid. + When PCRE2_SUBSTITUTE_EXTENDED is set, several things change: + + Firstly, backslash in a replacement string is interpreted as an escape + character. The usual forms such as \x{ddd} can be used to specify par- + ticular character codes, and backslash followed by any non-alphanumeric + character quotes that character. Extended quoting can be coded using + \Q...\E, exactly as in pattern strings. The escapes \b and \v are in- + terpreted as the characters backspace and vertical tab, respectively. - Firstly, backslash in a replacement string is interpreted as an escape - character. The usual forms such as \n or \x{ddd} can be used to specify - particular character codes, and backslash followed by any non-alphanu- - meric character quotes that character. Extended quoting can be coded - using \Q...\E, exactly as in pattern strings. + The interpretation of backslash followed by one or more digits is the + same as in a pattern, which in Perl has some ambiguities. Details are + given in the pcre2pattern page. + + The Python form \g, where the angle brackets are part of the syntax + and n is either a group name or number, is recognized as an altertive + way of inserting the contents of a group, for example \g<3>. There are also four escape sequences for forcing the case of inserted - letters. The insertion mechanism has three states: no case forcing, - force upper case, and force lower case. The escape sequences change the - current state: \U and \L change to upper or lower case forcing, respec- - tively, and \E (when not terminating a \Q quoted sequence) reverts to - no case forcing. The sequences \u and \l force the next character (if - it is a letter) to upper or lower case, respectively, and then the - state automatically reverts to no case forcing. Case forcing applies to - all inserted characters, including those from capture groups and let- - ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP - was set when the pattern was compiled, Unicode properties are used for - case forcing characters whose code points are greater than 127. + letters. Case forcing applies to all inserted characters, including + those from capture groups and letters within \Q...\E quoted sequences. + The insertion mechanism has three states: no case forcing, force upper + case, and force lower case. The escape sequences change the current + state: \U and \L change to upper or lower case forcing, respectively, + and \E (when not terminating a \Q quoted sequence) reverts to no case + forcing. The sequences \u and \l force the next character (if it is a + letter) to upper or lower case, respectively, and then the state auto- + matically reverts to no case forcing. + + However, if \u is immediately followed by \L or \l is immediately fol- + lowed by \U, the next character's case is forced by the first escape + sequence, and subsequent characters by the second. This provides a "ti- + tle casing" facility that can be applied to group captures. For exam- + ple, if group 1 has captured "heLLo", the replacement string "\u\L$1" + becomes "Hello". + + If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, + Unicode properties are used for case forcing characters whose code + points are greater than 127. However, only simple case folding, as de- + termined by the Unicode file CaseFolding.txt is supported. PCRE2 does + not support language-specific special casing rules such as using dif- + ferent lower case Greek sigmas in the middle and ends of words (as de- + fined in the Unicode file SpecialCasing.txt). Note that case forcing sequences such as \U...\E do not nest. For exam- ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX- TRA_ALT_BSUX options do not apply to replacement strings. - The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more + The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more flexibility to capture group substitution. The syntax is similar to that used by Bash: - ${:-} - ${:+:} + ${n:-string} + ${n:+string1:string2} - As before, may be a group number or a name. The first form speci- - fies a default value. If group is set, its value is inserted; if - not, is expanded and the result inserted. The second form - specifies strings that are expanded and inserted when group is set - or unset, respectively. The first form is just a convenient shorthand - for + As in the simple case, n may be a group number or a name. The first + form specifies a default value. If group n is set, its value is in- + serted; if not, the string is expanded and the result inserted. The + second form specifies strings that are expanded and inserted when group + n is set or unset, respectively. The first form is just a convenient + shorthand for - ${:+${}:} + ${n:+${n}:string} - Backslash can be used to escape colons and closing curly brackets in - the replacement strings. A change of the case forcing state within a - replacement string remains in force afterwards, as shown in this + Backslash can be used to escape colons and closing curly brackets in + the replacement strings. A change of the case forcing state within a + replacement string remains in force afterwards, as shown in this pcre2test example: /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo @@ -3657,8 +3843,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS somebody 1: HELLO - The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended - substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un- + The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended + substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un- known groups in the extended syntax forms to be treated as unset. If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, @@ -3667,39 +3853,39 @@ CREATING A NEW STRING WITH SUBSTITUTIONS Substitution errors - In the event of an error, pcre2_substitute() returns a negative error - code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors + In the event of an error, pcre2_substitute() returns a negative error + code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from pcre2_match() are passed straight back. PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser- tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ- - ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) - when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN- + ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) + when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN- SET_EMPTY is not set. - PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big + PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size - of buffer that is needed is returned via outlengthptr. Note that this + of buffer that is needed is returned via outlengthptr. Note that this does not happen by default. PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the - match_data argument is NULL or if the subject or replacement arguments - are NULL. For backward compatibility reasons an exception is made for + match_data argument is NULL or if the subject or replacement arguments + are NULL. For backward compatibility reasons an exception is made for the replacement argument if the rlength argument is also 0. - PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in - the replacement string, with more particular errors being PCRE2_ER- + PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in + the replacement string, with more particular errors being PCRE2_ER- ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE - (closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax - error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN + (closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax + error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started or the match started earlier - than the current position in the subject, which can happen if \K is + than the current position in the subject, which can happen if \K is used in an assertion). As for all PCRE2 errors, a text message that describes the error can be - obtained by calling the pcre2_get_error_message() function (see "Ob- + obtained by calling the pcre2_get_error_message() function (see "Ob- taining a textual error message" above). Substitution callouts @@ -3708,12 +3894,20 @@ CREATING A NEW STRING WITH SUBSTITUTIONS int (*callout_function)(pcre2_substitute_callout_block *, void *), void *callout_data); - The pcre2_set_substitution_callout() function can be used to specify a - callout function for pcre2_substitute(). This information is passed in + The pcre2_set_substitution_callout() function can be used to specify a + callout function for pcre2_substitute(). This information is passed in a match context. The callout function is called after each substitution - has been processed, but it can cause the replacement not to happen. The - callout function is not called for simulated substitutions that happen - as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. + has been processed, but it can cause the replacement not to happen. + + The callout function is not called for simulated substitutions that + happen as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In + this mode, when substitution processing exceeds the buffer space pro- + vided by the caller, processing continues by counting code units. The + simulation is unable to populate the callout block, and so the simula- + tion is pessimistic about the required buffer size. Whichever is larger + of accepted or rejected substitution is reported as the required size. + Therefore, the returned buffer length may be an overestimate (without a + substitution callout, it is normally an exact measurement). The first argument of the callout function is a pointer to a substitute callout block structure, which contains the following fields, not nec- @@ -3757,62 +3951,149 @@ CREATING A NEW STRING WITH SUBSTITUTIONS to the output and the call to pcre2_substitute() exits, returning the number of matches so far. + Substitution case callouts + + int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); + + The pcre2_set_substitution_case_callout() function can be used to spec- + ify a callout function for pcre2_substitute() to use when performing + case transformations. This does not affect any case insensitivity be- + haviour when performing a match, but only the user-visible transforma- + tions performed when processing a substitution such as: + + pcre2_substitute(..., "\\U$1", ...) + + The default case transformations applied by PCRE2 are reasonably com- + plete, and, in UTF or UCP mode, perform the simple locale-invariant + case transformations as specified by Unicode. This is suitable for the + internal (invisible) case-equivalence procedures used during pattern + matching, but an application may wish to use more sophisticated locale- + aware processing for the user-visible substitution transformations. + + One example implementation of the callout_function using the ICU li- + brary would be: + + PCRE2_SIZE + icu_case_callout( + PCRE2_SPTR input, PCRE2_SIZE input_len, + PCRE2_UCHAR *output, PCRE2_SIZE output_cap, + int to_case, void *data_ptr) + { + UErrorCode err = U_ZERO_ERROR; + int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER + ? u_strToLower(output, output_cap, input, input_len, NULL, &err) + : to_case == PCRE2_SUBSTITUTE_CASE_UPPER + ? u_strToUpper(output, output_cap, input, input_len, NULL, &err) + : u_strToTitle(output, output_cap, input, input_len, &first_char_only, + NULL, &err); + if (U_FAILURE(err)) return (~(PCRE2_SIZE)0); + return r; + } + + The first and second arguments of the case callout function are the + Unicode string to transform. + + The third and fourth arguments are the output buffer and its capacity. + + The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, + PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. + PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed + to the callout to indicate that the case of the entire callout input + should be case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed + to indicate that only the first character or glyph should be trans- + formed to Unicode titlecase and the rest to Unicode lowercase (note + that titlecasing sometimes uses Unicode properties to titlecase each + word in a string; but PCRE2 is requesting that only the single leading + character is to be titlecased). + + The sixth argument is the callout_data supplied to pcre2_set_substi- + tute_case_callout(). + + The resulting string in the destination buffer may be larger or smaller + than the input, if the casing rules merge or split characters. The re- + turn value is the length required for the output string. If a buffer of + sufficient size was provided to the callout, then the result must be + written to the buffer and the number of code units returned. If the re- + sult does not fit in the provided buffer, then the required capacity + must be returned and PCRE2 will not make use of the output buffer. + PCRE2 provides input and output buffers which overlap, so the callout + must support this by suitable internal buffering. + + Alternatively, if the callout wishes to indicate an error, then it may + return (~(PCRE2_SIZE)0). In this case pcre2_substitute() will immedi- + ately fail with error PCRE2_ERROR_REPLACECASE. + + When a case callout is combined with the PCRE2_SUBSTITUTE_OVER- + FLOW_LENGTH option, there are situations when pcre2_substitute() will + return an underestimate of the required buffer size. If you call + pcre2_substitute() once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the + input buffer is too small for the replacement string to be constructed, + then instead of calling the case callout, pcre2_substitute() will make + an estimate of the required buffer size. The second call should also + pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that second call is not + guaranteed to succeed either, if the case callout requires more buffer + space than expected. The caller must make repeated attempts in a loop. + DUPLICATE CAPTURE GROUP NAMES int pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); - When a pattern is compiled with the PCRE2_DUPNAMES option, names for - capture groups are not required to be unique. Duplicate names are al- - ways allowed for groups with the same number, created by using the (?| + When a pattern is compiled with the PCRE2_DUPNAMES option, names for + capture groups are not required to be unique. Duplicate names are al- + ways allowed for groups with the same number, created by using the (?| feature. Indeed, if such groups are named, they are required to use the same names. - Normally, patterns that use duplicate names are such that in any one - match, only one of each set of identically-named groups participates. + Normally, patterns that use duplicate names are such that in any one + match, only one of each set of identically-named groups participates. An example is shown in the pcre2pattern documentation. - When duplicates are present, pcre2_substring_copy_byname() and - pcre2_substring_get_byname() return the first substring corresponding - to the given name that is set. Only if none are set is PCRE2_ERROR_UN- - SET is returned. The pcre2_substring_number_from_name() function re- - turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate + When duplicates are present, pcre2_substring_copy_byname() and + pcre2_substring_get_byname() return the first substring corresponding + to the given name that is set. Only if none are set is PCRE2_ERROR_UN- + SET is returned. The pcre2_substring_number_from_name() function re- + turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names. - If you want to get full details of all captured substrings for a given - name, you must use the pcre2_substring_nametable_scan() function. The - first argument is the compiled pattern, and the second is the name. If - the third and fourth arguments are NULL, the function returns a group + If you want to get full details of all captured substrings for a given + name, you must use the pcre2_substring_nametable_scan() function. The + first argument is the compiled pattern, and the second is the name. If + the third and fourth arguments are NULL, the function returns a group number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. When the third and fourth arguments are not NULL, they must be pointers - to variables that are updated by the function. After it has run, they + to variables that are updated by the function. After it has run, they point to the first and last entries in the name-to-number table for the - given name, and the function returns the length of each entry in code - units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are + given name, and the function returns the length of each entry in code + units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. The format of the name table is described above in the section entitled - Information about a pattern. Given all the relevant entries for the - name, you can extract each of their numbers, and hence the captured + Information about a pattern. Given all the relevant entries for the + name, you can extract each of their numbers, and hence the captured data. FINDING ALL POSSIBLE MATCHES AT ONE POSITION - The traditional matching function uses a similar algorithm to Perl, - which stops when it finds the first match at a given point in the sub- + The traditional matching function uses a similar algorithm to Perl, + which stops when it finds the first match at a given point in the sub- ject. If you want to find all possible matches, or the longest possible - match at a given position, consider using the alternative matching - function (see below) instead. If you cannot use the alternative func- + match at a given position, consider using the alternative matching + function (see below) instead. If you cannot use the alternative func- tion, you can kludge it up by making use of the callout facility, which is described in the pcre2callout documentation. What you have to do is to insert a callout right at the end of the pat- - tern. When your callout function is called, extract and save the cur- - rent matched substring. Then return 1, which forces pcre2_match() to - backtrack and try other alternatives. Ultimately, when it runs out of + tern. When your callout function is called, extract and save the cur- + rent matched substring. Then return 1, which forces pcre2_match() to + backtrack and try other alternatives. Ultimately, when it runs out of matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH. @@ -3824,27 +4105,27 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount); - The function pcre2_dfa_match() is called to match a subject string - against a compiled pattern, using a matching algorithm that scans the + The function pcre2_dfa_match() is called to match a subject string + against a compiled pattern, using a matching algorithm that scans the subject string just once (not counting lookaround assertions), and does - not backtrack (except when processing lookaround assertions). This has - different characteristics to the normal algorithm, and is not compati- - ble with Perl. Some of the features of PCRE2 patterns are not sup- + not backtrack (except when processing lookaround assertions). This has + different characteristics to the normal algorithm, and is not compati- + ble with Perl. Some of the features of PCRE2 patterns are not sup- ported. Nevertheless, there are times when this kind of matching can be - useful. For a discussion of the two matching algorithms, and a list of + useful. For a discussion of the two matching algorithms, and a list of features that pcre2_dfa_match() does not support, see the pcre2matching documentation. - The arguments for the pcre2_dfa_match() function are the same as for + The arguments for the pcre2_dfa_match() function are the same as for pcre2_match(), plus two extras. The ovector within the match data block is used in a different way, and this is described below. The other com- - mon arguments are used in the same way as for pcre2_match(), so their + mon arguments are used in the same way as for pcre2_match(), so their description is not repeated here. - The two additional arguments provide workspace for the function. The - workspace vector should contain at least 20 elements. It is used for - keeping track of multiple paths through the pattern tree. More work- - space is needed for patterns and subjects where there are a lot of po- + The two additional arguments provide workspace for the function. The + workspace vector should contain at least 20 elements. It is used for + keeping track of multiple paths through the pattern tree. More work- + space is needed for patterns and subjects where there are a lot of po- tential matches. Here is an example of a simple call to pcre2_dfa_match(): @@ -3864,45 +4145,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION Option bits for pcre2_dfa_match() - The unused bits of the options argument for pcre2_dfa_match() must be - zero. The only bits that may be set are PCRE2_ANCHORED, - PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO- + The unused bits of the options argument for pcre2_dfa_match() must be + zero. The only bits that may be set are PCRE2_ANCHORED, + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO- TEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, - PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and - PCRE2_DFA_RESTART. All but the last four of these are exactly the same + PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and + PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for pcre2_match(), so their description is not repeated here. PCRE2_PARTIAL_HARD PCRE2_PARTIAL_SOFT - These have the same general effect as they do for pcre2_match(), but - the details are slightly different. When PCRE2_PARTIAL_HARD is set for - pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the + These have the same general effect as they do for pcre2_match(), but + the details are slightly different. When PCRE2_PARTIAL_HARD is set for + pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the subject is reached and there is still at least one matching possibility that requires additional characters. This happens even if some complete - matches have already been found. When PCRE2_PARTIAL_SOFT is set, the - return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL - if the end of the subject is reached, there have been no complete + matches have already been found. When PCRE2_PARTIAL_SOFT is set, the + return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL + if the end of the subject is reached, there have been no complete matches, but there is still at least one matching possibility. The por- - tion of the string that was inspected when the longest partial match + tion of the string that was inspected when the longest partial match was found is set as the first matching string in both cases. There is a - more detailed discussion of partial and multi-segment matching, with + more detailed discussion of partial and multi-segment matching, with examples, in the pcre2partial documentation. PCRE2_DFA_SHORTEST - Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to + Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to stop as soon as it has found one match. Because of the way the alterna- - tive algorithm works, this is necessarily the shortest possible match + tive algorithm works, this is necessarily the shortest possible match at the first possible matching point in the subject string. PCRE2_DFA_RESTART - When pcre2_dfa_match() returns a partial match, it is possible to call + When pcre2_dfa_match() returns a partial match, it is possible to call it again, with additional subject characters, and have it continue with the same match. The PCRE2_DFA_RESTART option requests this action; when - it is set, the workspace and wscount options must reference the same - vector as before because data about the match so far is left in them + it is set, the workspace and wscount options must reference the same + vector as before because data about the match so far is left in them after a partial match. There is more discussion of this facility in the pcre2partial documentation. @@ -3910,8 +4191,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION When pcre2_dfa_match() succeeds, it may have matched more than one sub- string in the subject. Note, however, that all the matches from one run - of the function start at the same point in the subject. The shorter - matches are all initial substrings of the longer matches. For example, + of the function start at the same point in the subject. The shorter + matches are all initial substrings of the longer matches. For example, if the pattern <.*> @@ -3926,80 +4207,80 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION - On success, the yield of the function is a number greater than zero, - which is the number of matched substrings. The offsets of the sub- - strings are returned in the ovector, and can be extracted by number in - the same way as for pcre2_match(), but the numbers bear no relation to - any capture groups that may exist in the pattern, because DFA matching + On success, the yield of the function is a number greater than zero, + which is the number of matched substrings. The offsets of the sub- + strings are returned in the ovector, and can be extracted by number in + the same way as for pcre2_match(), but the numbers bear no relation to + any capture groups that may exist in the pattern, because DFA matching does not support capturing. - Calls to the convenience functions that extract substrings by name re- + Calls to the convenience functions that extract substrings by name re- turn the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used af- - ter a DFA match. The convenience functions that extract substrings by + ter a DFA match. The convenience functions that extract substrings by number never return PCRE2_ERROR_NOSUBSTRING. - The matched strings are stored in the ovector in reverse order of - length; that is, the longest matching string is first. If there were - too many matches to fit into the ovector, the yield of the function is + The matched strings are stored in the ovector in reverse order of + length; that is, the longest matching string is first. If there were + too many matches to fit into the ovector, the yield of the function is zero, and the vector is filled with the longest matches. - NOTE: PCRE2's "auto-possessification" optimization usually applies to - character repeats at the end of a pattern (as well as internally). For - example, the pattern "a\d+" is compiled as if it were "a\d++". For DFA - matching, this means that only one possible match is found. If you re- + NOTE: PCRE2's "auto-possessification" optimization usually applies to + character repeats at the end of a pattern (as well as internally). For + example, the pattern "a\d+" is compiled as if it were "a\d++". For DFA + matching, this means that only one possible match is found. If you re- ally do want multiple matches in such cases, either use an ungreedy re- - peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com- + peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com- piling. Error returns from pcre2_dfa_match() The pcre2_dfa_match() function returns a negative number when it fails. - Many of the errors are the same as for pcre2_match(), as described + Many of the errors are the same as for pcre2_match(), as described above. There are in addition the following errors that are specific to pcre2_dfa_match(): PCRE2_ERROR_DFA_UITEM - This return is given if pcre2_dfa_match() encounters an item in the - pattern that it does not support, for instance, the use of \C in a UTF + This return is given if pcre2_dfa_match() encounters an item in the + pattern that it does not support, for instance, the use of \C in a UTF mode or a backreference. PCRE2_ERROR_DFA_UCOND - This return is given if pcre2_dfa_match() encounters a condition item + This return is given if pcre2_dfa_match() encounters a condition item that uses a backreference for the condition, or a test for recursion in a specific capture group. These are not supported. PCRE2_ERROR_DFA_UINVALID_UTF - This return is given if pcre2_dfa_match() is called for a pattern that - was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for + This return is given if pcre2_dfa_match() is called for a pattern that + was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for DFA matching. PCRE2_ERROR_DFA_WSSIZE - This return is given if pcre2_dfa_match() runs out of space in the + This return is given if pcre2_dfa_match() runs out of space in the workspace vector. PCRE2_ERROR_DFA_RECURSE When a recursion or subroutine call is processed, the matching function - calls itself recursively, using private memory for the ovector and - workspace. This error is given if the internal ovector is not large - enough. This should be extremely rare, as a vector of size 1000 is + calls itself recursively, using private memory for the ovector and + workspace. This error is given if the internal ovector is not large + enough. This should be extremely rare, as a vector of size 1000 is used. PCRE2_ERROR_DFA_BADRESTART - When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option, - some plausibility checks are made on the contents of the workspace, - which should contain data about the previous partial match. If any of + When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option, + some plausibility checks are made on the contents of the workspace, + which should contain data about the previous partial match. If any of these checks fail, this error is given. SEE ALSO - pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3), + pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3), pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3). @@ -4012,15 +4293,14 @@ AUTHOR REVISION - Last updated: 24 April 2024 + Last updated: 26 December 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.44 24 April 2024 PCRE2API(3) +PCRE2 10.45-RC1 26 December 2024 PCRE2API(3) ------------------------------------------------------------------------------ - PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) @@ -4639,15 +4919,14 @@ AUTHOR REVISION - Last updated: 15 April 2024 + Last updated: 16 April 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.44 15 April 2024 PCRE2BUILD(3) +PCRE2 10.45-RC1 16 April 2024 PCRE2BUILD(3) ------------------------------------------------------------------------------ - PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) @@ -5077,11 +5356,10 @@ REVISION Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 19 January 2024 PCRE2CALLOUT(3) +PCRE2 10.45-RC1 19 January 2024 PCRE2CALLOUT(3) ------------------------------------------------------------------------------ - PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) @@ -5140,7 +5418,7 @@ DIFFERENCES BETWEEN PCRE2 AND PERL 7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \p and \P are limited to the general category properties - such as Lu and Nd, the derived properties Any and LC (synonym L&), + such as Lu and Nd, the derived properties Any and Lc (synonym L&), script names such as Greek or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and Perl support the Cs (surrogate) prop- erty, but in PCRE2 its use is limited. See the pcre2pattern documenta- @@ -5167,118 +5445,128 @@ DIFFERENCES BETWEEN PCRE2 AND PERL \Q\\E \ \\E The \Q...\E sequence is recognized both inside and outside character - classes by both PCRE2 and Perl. - - 9. Fairly obviously, PCRE2 does not support the (?{code}) and + classes by both PCRE2 and Perl. Another difference from Perl is that + any appearance of \Q or \E inside what might otherwise be a quantifier + causes PCRE2 not to recognize the sequence as a quantifier. Perl recog- + nizes a quantifier if (redundantly) either of the numbers is inside + \Q...\E, but not if the separating comma is. When not recognized as a + quantifier a sequence such as {\Q1\E,2} is treated as the literal + string "{1,2}". + + 9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an external function to be called during pattern matching. See the pcre2callout documentation for details. - 10. Subroutine calls (whether recursive or not) were treated as atomic - groups up to PCRE2 release 10.23, but from release 10.30 this changed, + 10. Subroutine calls (whether recursive or not) were treated as atomic + groups up to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking into subroutine calls is now supported, as in Perl. - 11. In PCRE2, if any of the backtracking control verbs are used in a - group that is called as a subroutine (whether or not recursively), - their effect is confined to that group; it does not extend to the sur- - rounding pattern. This is not always the case in Perl. In particular, - if (*THEN) is present in a group that is called as a subroutine, its + 11. In PCRE2, if any of the backtracking control verbs are used in a + group that is called as a subroutine (whether or not recursively), + their effect is confined to that group; it does not extend to the sur- + rounding pattern. This is not always the case in Perl. In particular, + if (*THEN) is present in a group that is called as a subroutine, its action is limited to that group, even if the group does not contain any - | characters. Note that such groups are processed as anchored at the - point where they are tested. - - 12. If a pattern contains more than one backtracking control verb, the - first one that is backtracked onto acts. For example, in the pattern - A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure + | characters. Note that such groups are processed as anchored at the + point where they are tested. PCRE2 also confines all control verbs + within atomic assertions, again including (*THEN) in assertions with + only one branch. + + 12. If a pattern contains more than one backtracking control verb, the + first one that is backtracked onto acts. For example, in the pattern + A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE2, but there are cases where it differs. - 13. There are some differences that are concerned with the settings of - captured strings when part of a pattern is repeated. For example, - matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 un- + 13. There are some differences that are concerned with the settings of + captured strings when part of a pattern is repeated. For example, + matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 un- set, but in PCRE2 it is set to "b". - 14. PCRE2's handling of duplicate capture group numbers and names is - not as general as Perl's. This is a consequence of the fact the PCRE2 - works internally just with numbers, using an external table to trans- - late between numbers and names. In particular, a pattern such as - (?|(?A)|(?B)), where the two capture groups have the same number - but different names, is not supported, and causes an error at compile + 14. PCRE2's handling of duplicate capture group numbers and names is + not as general as Perl's. This is a consequence of the fact the PCRE2 + works internally just with numbers, using an external table to trans- + late between numbers and names. In particular, a pattern such as + (?|(?A)|(?B)), where the two capture groups have the same number + but different names, is not supported, and causes an error at compile time. If it were allowed, it would not be possible to distinguish which - group matched, because both names map to capture group number 1. To + group matched, because both names map to capture group number 1. To avoid this confusing situation, an error is given at compile time. 15. Perl used to recognize comments in some places that PCRE2 does not, - for example, between the ( and ? at the start of a group. If the /x - modifier is set, Perl allowed white space between ( and ? though the - latest Perls give an error (for a while it was just deprecated). There + for example, between the ( and ? at the start of a group. If the /x + modifier is set, Perl allowed white space between ( and ? though the + latest Perls give an error (for a while it was just deprecated). There may still be some cases where Perl behaves differently. - 16. Perl, when in warning mode, gives warnings for character classes - such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- + 16. Perl, when in warning mode, gives warnings for character classes + such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- als. PCRE2 has no warning features, so it gives an error in these cases because they are almost certainly user mistakes. - 17. In PCRE2, the upper/lower case character properties Lu and Ll are - not affected when case-independent matching is specified. For example, - \p{Lu} always matches an upper case letter. I think Perl has changed in - this respect; in the release at the time of writing (5.38), \p{Lu} and - \p{Ll} match all letters, regardless of case, when case independence is - specified. + 17. In PCRE2, until release 10.45, the upper/lower case character prop- + erties Lu and Ll were not affected when case-independent matching was + specified. Perl has changed in this respect, and PCRE2 has now changed + to match. When caseless matching is in force, Lu, Ll, and Lt (title + case) are all treated as Lc (cased letter). 18. From release 5.32.0, Perl locks out the use of \K in lookaround as- - sertions. From release 10.38 PCRE2 does the same by default. However, - there is an option for re-enabling the previous behaviour. When this - option is set, \K is acted on when it occurs in positive assertions, + sertions. From release 10.38 PCRE2 does the same by default. However, + there is an option for re-enabling the previous behaviour. When this + option is set, \K is acted on when it occurs in positive assertions, but is ignored in negative assertions. - 19. PCRE2 provides some extensions to the Perl regular expression fa- - cilities. Perl 5.10 included new features that were not in earlier - versions of Perl, some of which (such as named parentheses) were in + 19. PCRE2 provides some extensions to the Perl regular expression fa- + cilities. Perl 5.10 included new features that were not in earlier + versions of Perl, some of which (such as named parentheses) were in PCRE2 for some time before. This list is with respect to Perl 5.38: - (a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the + (a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ meta-character matches only at the very end of the string. - (b) A backslash followed by a letter with no special meaning is + (b) A backslash followed by a letter with no special meaning is faulted. (Perl can be made to issue a warning.) - (c) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti- + (c) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti- fiers is inverted, that is, by default they are not greedy, but if fol- lowed by a question mark they are. - (d) PCRE2_ANCHORED can be used at matching time to force a pattern to + (d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried only at the first matching position in the subject string. - (e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and + (e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART options have no Perl equivalents. - (f) The \R escape sequence can be restricted to match only CR, LF, or + (f) The \R escape sequence can be restricted to match only CR, LF, or CRLF by the PCRE2_BSR_ANYCRLF option. - (g) The callout facility is PCRE2-specific. Perl supports codeblocks + (g) The callout facility is PCRE2-specific. Perl supports codeblocks and variable interpolation, but not general hooks on every match. (h) The partial matching facility is PCRE2-specific. - (i) The alternative matching function (pcre2_dfa_match() matches in a + (i) The alternative matching function (pcre2_dfa_match() matches in a different way and is not Perl-compatible. - (j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) - at the start of a pattern. These set overall options that cannot be + (j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) + at the start of a pattern. These set overall options that cannot be changed within the pattern. - (k) PCRE2 supports non-atomic positive lookaround assertions. This is + (k) PCRE2 supports non-atomic positive lookaround assertions. This is an extension to the lookaround facilities. The default, Perl-compatible lookarounds are atomic. - (l) There are three syntactical items in patterns that can refer to a - capturing group by number: back references such as \g{2}, subroutine - calls such as (?3), and condition references such as (?(4)...). PCRE2 - supports relative group numbers such as +2 and -4 in all three cases. - Perl supports both plus and minus for subroutine calls, but only minus + (l) There are three syntactical items in patterns that can refer to a + capturing group by number: back references such as \g{2}, subroutine + calls such as (?3), and condition references such as (?(4)...). PCRE2 + supports relative group numbers such as +2 and -4 in all three cases. + Perl supports both plus and minus for subroutine calls, but only minus for back references, and no relative numbering at all for conditions. + (m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 ex- + tension that is not available in Perl. + 20. Perl has different limits than PCRE2. See the pcre2limit documenta- tion for details. Perl went with 5.10 from recursion to iteration keep- ing the intermediate matches on the heap, which is ~10% slower but does @@ -5297,6 +5585,17 @@ DIFFERENCES BETWEEN PCRE2 AND PERL ple is /(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated "abcd" substrings at the end of the subject. + 23. Both PCRE2 and Perl error when \x{ escapes are invalid, but Perl + tries to recover and prints a warning if the problem was that an in- + valid hexadecimal digit was found, since PCRE2 doesn't have warnings it + returns an error instead. Additionally, Perl accepts \x{} and gener- + ates NUL unlike PCRE2. + + 24. From release 10.45, PCRE2 gives an error if \x is not followed by a + hexadecimal digit or a curly bracket. It used to interpret this as the + NUL character. Perl still generates NUL, but warns when in warning mode + in most cases. + AUTHOR @@ -5307,15 +5606,14 @@ AUTHOR REVISION - Last updated: 30 November 2023 - Copyright (c) 1997-2023 University of Cambridge. + Last updated: 02 October 2024 + Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 30 November 2023 PCRE2COMPAT(3) +PCRE2 10.45-RC1 02 October 2024 PCRE2COMPAT(3) ------------------------------------------------------------------------------ - PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) @@ -5359,146 +5657,155 @@ AVAILABILITY OF JIT SUPPORT If --enable-jit is set on an unsupported platform, compilation fails. - A client program can tell if JIT support is available by calling + A client program can tell if JIT support has been compiled by calling pcre2_config() with the PCRE2_CONFIG_JIT option. The result is one if PCRE2 was built with JIT support, and zero otherwise. However, having the JIT code available does not guarantee that it will be used for any particular match. One reason for this is that there are a number of op- tions and pattern items that are not supported by JIT (see below). An- - other reason is that in some environments JIT is unable to get memory - in which to build its compiled code. The only guarantee from pcre2_con- - fig() is that if it returns zero, JIT will definitely not be used. - - A simple program does not need to check availability in order to use - JIT when possible. The API is implemented in a way that falls back to - the interpretive code if JIT is not available or cannot be used for a - given match. For programs that need the best possible performance, + other reason is that in some environments JIT is unable to get exe- + cutable memory in which to build its compiled code. The only guarantee + from pcre2_config() is that if it returns zero, JIT will definitely not + be used. + + As of release 10.45 there is a more informative way to test for JIT + support. If pcre2_compile_jit() is called with the single option + PCRE2_JIT_TEST_ALLOC it returns zero if JIT is available and has a + working allocator. Otherwise it returns PCRE2_ERROR_NOMEMORY if JIT is + available but cannot allocate executable memory, or PCRE2_ERROR_JIT_UN- + SUPPORTED if JIT support is not compiled. The code argument is ignored, + so it can be a NULL value. + + A simple program does not need to check availability in order to use + JIT when possible. The API is implemented in a way that falls back to + the interpretive code if JIT is not available or cannot be used for a + given match. For programs that need the best possible performance, there is a "fast path" API that is JIT-specific. SIMPLE USE OF JIT - To make use of the JIT support in the simplest way, all you have to do - is to call pcre2_jit_compile() after successfully compiling a pattern + To make use of the JIT support in the simplest way, all you have to do + is to call pcre2_jit_compile() after successfully compiling a pattern with pcre2_compile(). This function has two arguments: the first is the - compiled pattern pointer that was returned by pcre2_compile(), and the - second is zero or more of the following option bits: PCRE2_JIT_COM- + compiled pattern pointer that was returned by pcre2_compile(), and the + second is zero or more of the following option bits: PCRE2_JIT_COM- PLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. - If JIT support is not available, a call to pcre2_jit_compile() does - nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled + If JIT support is not available, a call to pcre2_jit_compile() does + nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern is passed to the JIT compiler, which turns it into machine code that executes much faster than the normal interpretive code, but yields - exactly the same results. The returned value from pcre2_jit_compile() + exactly the same results. The returned value from pcre2_jit_compile() is zero on success, or a negative error code. - There is a limit to the size of pattern that JIT supports, imposed by - the size of machine stack that it uses. The exact rules are not docu- + There is a limit to the size of pattern that JIT supports, imposed by + the size of machine stack that it uses. The exact rules are not docu- mented because they may change at any time, in particular, when new op- - timizations are introduced. If a pattern is too big, a call to + timizations are introduced. If a pattern is too big, a call to pcre2_jit_compile() returns PCRE2_ERROR_NOMEMORY. - PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com- - plete matches. If you want to run partial matches using the PCRE2_PAR- - TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should - set one or both of the other options as well as, or instead of + PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com- + plete matches. If you want to run partial matches using the PCRE2_PAR- + TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should + set one or both of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT compiler generates different optimized code - for each of the three modes (normal, soft partial, hard partial). When - pcre2_match() is called, the appropriate code is run if it is avail- + for each of the three modes (normal, soft partial, hard partial). When + pcre2_match() is called, the appropriate code is run if it is avail- able. Otherwise, the pattern is matched using interpretive code. - You can call pcre2_jit_compile() multiple times for the same compiled - pattern. It does nothing if it has previously compiled code for any of - the option bits. For example, you can call it once with PCRE2_JIT_COM- - PLETE and (perhaps later, when you find you need partial matching) - again with PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it + You can call pcre2_jit_compile() multiple times for the same compiled + pattern. It does nothing if it has previously compiled code for any of + the option bits. For example, you can call it once with PCRE2_JIT_COM- + PLETE and (perhaps later, when you find you need partial matching) + again with PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore PCRE2_JIT_COMPLETE and just compile code for partial match- ing. If pcre2_jit_compile() is called with no option bits set, it imme- diately returns zero. This is an alternative way of testing whether JIT - is available. + support has been compiled. - At present, it is not possible to free JIT compiled code except when + At present, it is not possible to free JIT compiled code except when the entire compiled pattern is freed by calling pcre2_code_free(). - In some circumstances you may need to call additional functions. These - are described in the section entitled "Controlling the JIT stack" be- + In some circumstances you may need to call additional functions. These + are described in the section entitled "Controlling the JIT stack" be- low. There are some pcre2_match() options that are not supported by JIT, and - there are also some pattern items that JIT cannot handle. Details are - given below. In both cases, matching automatically falls back to the - interpretive code. If you want to know whether JIT was actually used - for a particular match, you should arrange for a JIT callback function - to be set up as described in the section entitled "Controlling the JIT - stack" below, even if you do not need to supply a non-default JIT + there are also some pattern items that JIT cannot handle. Details are + given below. In both cases, matching automatically falls back to the + interpretive code. If you want to know whether JIT was actually used + for a particular match, you should arrange for a JIT callback function + to be set up as described in the section entitled "Controlling the JIT + stack" below, even if you do not need to supply a non-default JIT stack. Such a callback function is called whenever JIT code is about to - be obeyed. If the match-time options are not right for JIT execution, + be obeyed. If the match-time options are not right for JIT execution, the callback function is not obeyed. - If the JIT compiler finds an unsupported item, no JIT data is gener- + If the JIT compiler finds an unsupported item, no JIT data is gener- ated. You can find out if JIT compilation was successful for a compiled pattern by calling pcre2_pattern_info() with the PCRE2_INFO_JITSIZE op- - tion. A non-zero result means that JIT compilation was successful. A + tion. A non-zero result means that JIT compilation was successful. A result of 0 means that JIT support is not available, or the pattern was - not processed by pcre2_jit_compile(), or the JIT compiler was not able - to handle the pattern. Successful JIT compilation does not, however, - guarantee the use of JIT at match time because there are some match + not processed by pcre2_jit_compile(), or the JIT compiler was not able + to handle the pattern. Successful JIT compilation does not, however, + guarantee the use of JIT at match time because there are some match time options that are not supported by JIT. MATCHING SUBJECTS CONTAINING INVALID UTF - When a pattern is compiled with the PCRE2_UTF option, subject strings - are normally expected to be a valid sequence of UTF code units. By de- - fault, this is checked at the start of matching and an error is gener- - ated if invalid UTF is detected. The PCRE2_NO_UTF_CHECK option can be + When a pattern is compiled with the PCRE2_UTF option, subject strings + are normally expected to be a valid sequence of UTF code units. By de- + fault, this is checked at the start of matching and an error is gener- + ated if invalid UTF is detected. The PCRE2_NO_UTF_CHECK option can be passed to pcre2_match() to skip the check (for improved performance) if - you are sure that a subject string is valid. If this option is used - with an invalid string, the result is undefined. The calling program + you are sure that a subject string is valid. If this option is used + with an invalid string, the result is undefined. The calling program may crash or loop or otherwise misbehave. - However, a way of running matches on strings that may contain invalid - UTF sequences is available. Calling pcre2_compile() with the - PCRE2_MATCH_INVALID_UTF option has two effects: it tells the inter- - preter in pcre2_match() to support invalid UTF, and, if pcre2_jit_com- - pile() is subsequently called, the compiled JIT code also supports in- - valid UTF. Details of how this support works, in both the JIT and the + However, a way of running matches on strings that may contain invalid + UTF sequences is available. Calling pcre2_compile() with the + PCRE2_MATCH_INVALID_UTF option has two effects: it tells the inter- + preter in pcre2_match() to support invalid UTF, and, if pcre2_jit_com- + pile() is subsequently called, the compiled JIT code also supports in- + valid UTF. Details of how this support works, in both the JIT and the interpretive cases, is given in the pcre2unicode documentation. There is also an obsolete option for pcre2_jit_compile() called PCRE2_JIT_INVALID_UTF, which currently exists only for backward compat- - ibility. It is superseded by the pcre2_compile() option + ibility. It is superseded by the pcre2_compile() option PCRE2_MATCH_INVALID_UTF and should no longer be used. It may be removed in future. UNSUPPORTED OPTIONS AND PATTERN ITEMS - The pcre2_match() options that are supported for JIT matching are + The pcre2_match() options that are supported for JIT matching are PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, - PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and - PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options + PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and + PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not supported at match time. - If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the + If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the use of JIT, forcing matching by the interpreter code. - The only unsupported pattern items are \C (match a single data unit) - when running in a UTF mode, and a callout immediately before an asser- + The only unsupported pattern items are \C (match a single data unit) + when running in a UTF mode, and a callout immediately before an asser- tion condition in a conditional group. RETURN VALUES FROM JIT MATCHING - When a pattern is matched using JIT, the return values are the same as - those given by the interpretive pcre2_match() code, with the addition - of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the - memory used for the JIT stack was insufficient. See "Controlling the + When a pattern is matched using JIT, the return values are the same as + those given by the interpretive pcre2_match() code, with the addition + of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the + memory used for the JIT stack was insufficient. See "Controlling the JIT stack" below for a discussion of JIT stack usage. - The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if - searching a very large pattern tree goes on for too long, as it is in - the same circumstance when JIT is not used, but the details of exactly + The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if + searching a very large pattern tree goes on for too long, as it is in + the same circumstance when JIT is not used, but the details of exactly what is counted are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned when JIT matching is used. @@ -5506,25 +5813,25 @@ RETURN VALUES FROM JIT MATCHING CONTROLLING THE JIT STACK When the compiled JIT code runs, it needs a block of memory to use as a - stack. By default, it uses 32KiB on the machine stack. However, some - large or complicated patterns need more than this. The error PCRE2_ER- + stack. By default, it uses 32KiB on the machine stack. However, some + large or complicated patterns need more than this. The error PCRE2_ER- ROR_JIT_STACKLIMIT is given when there is not enough stack. Three func- tions are provided for managing blocks of memory for use as JIT stacks. - There is further discussion about the use of JIT stacks in the section + There is further discussion about the use of JIT stacks in the section entitled "JIT stack FAQ" below. - The pcre2_jit_stack_create() function creates a JIT stack. Its argu- - ments are a starting size, a maximum size, and a general context (for - memory allocation functions, or NULL for standard memory allocation). + The pcre2_jit_stack_create() function creates a JIT stack. Its argu- + ments are a starting size, a maximum size, and a general context (for + memory allocation functions, or NULL for standard memory allocation). It returns a pointer to an opaque structure of type pcre2_jit_stack, or - NULL if there is an error. The pcre2_jit_stack_free() function is used + NULL if there is an error. The pcre2_jit_stack_free() function is used to free a stack that is no longer needed. If its argument is NULL, this - function returns immediately, without doing anything. (For the techni- - cally minded: the address space is allocated by mmap or VirtualAlloc.) - A maximum stack size of 512KiB to 1MiB should be more than enough for + function returns immediately, without doing anything. (For the techni- + cally minded: the address space is allocated by mmap or VirtualAlloc.) + A maximum stack size of 512KiB to 1MiB should be more than enough for any pattern. - The pcre2_jit_stack_assign() function specifies which stack JIT code + The pcre2_jit_stack_assign() function specifies which stack JIT code should use. Its arguments are as follows: pcre2_match_context *mcontext @@ -5534,7 +5841,7 @@ CONTROLLING THE JIT STACK The first argument is a pointer to a match context. When this is subse- quently passed to a matching function, its information determines which JIT stack is used. If this argument is NULL, the function returns imme- - diately, without doing anything. There are three cases for the values + diately, without doing anything. There are three cases for the values of the other two options: (1) If callback is NULL and data is NULL, an internal 32KiB block @@ -5552,34 +5859,34 @@ CONTROLLING THE JIT STACK return value must be a valid JIT stack, the result of calling pcre2_jit_stack_create(). - A callback function is obeyed whenever JIT code is about to be run; it + A callback function is obeyed whenever JIT code is about to be run; it is not obeyed when pcre2_match() is called with options that are incom- - patible for JIT matching. A callback function can therefore be used to - determine whether a match operation was executed by JIT or by the in- + patible for JIT matching. A callback function can therefore be used to + determine whether a match operation was executed by JIT or by the in- terpreter. You may safely use the same JIT stack for more than one pattern (either - by assigning directly or by callback), as long as the patterns are + by assigning directly or by callback), as long as the patterns are matched sequentially in the same thread. Currently, the only way to set - up non-sequential matches in one thread is to use callouts: if a call- - out function starts another match, that match must use a different JIT + up non-sequential matches in one thread is to use callouts: if a call- + out function starts another match, that match must use a different JIT stack to the one used for currently suspended match(es). - In a multithread application, if you do not specify a JIT stack, or if - you assign or pass back NULL from a callback, that is thread-safe, be- - cause each thread has its own machine stack. However, if you assign or + In a multithread application, if you do not specify a JIT stack, or if + you assign or pass back NULL from a callback, that is thread-safe, be- + cause each thread has its own machine stack. However, if you assign or pass back a non-NULL JIT stack, this must be a different stack for each thread so that the application is thread-safe. - Strictly speaking, even more is allowed. You can assign the same non- - NULL stack to a match context that is used by any number of patterns, - as long as they are not used for matching by multiple threads at the - same time. For example, you could use the same stack in all compiled - patterns, with a global mutex in the callback to wait until the stack + Strictly speaking, even more is allowed. You can assign the same non- + NULL stack to a match context that is used by any number of patterns, + as long as they are not used for matching by multiple threads at the + same time. For example, you could use the same stack in all compiled + patterns, with a global mutex in the callback to wait until the stack is available for use. However, this is an inefficient solution, and not recommended. - This is a suggestion for how a multithreaded program that needs to set + This is a suggestion for how a multithreaded program that needs to set up non-default JIT stacks might operate: During thread initialization @@ -5591,7 +5898,7 @@ CONTROLLING THE JIT STACK Use a one-line callback function return thread_local_var - All the functions described in this section do nothing if JIT is not + All the functions described in this section do nothing if JIT is not available. @@ -5600,20 +5907,20 @@ JIT STACK FAQ (1) Why do we need JIT stacks? PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack - where the local data of the current node is pushed before checking its + where the local data of the current node is pushed before checking its child nodes. Allocating real machine stack on some platforms is diffi- cult. For example, the stack chain needs to be updated every time if we - extend the stack on PowerPC. Although it is possible, its updating + extend the stack on PowerPC. Although it is possible, its updating time overhead decreases performance. So we do the recursion in memory. (2) Why don't we simply allocate blocks of memory with malloc()? - Modern operating systems have a nice feature: they can reserve an ad- + Modern operating systems have a nice feature: they can reserve an ad- dress space instead of allocating memory. We can safely allocate memory pages inside this address space, so the stack could grow without moving - memory data (this is important because of pointers). Thus we can allo- - cate 1MiB address space, and use only a single memory page (usually - 4KiB) if that is enough. However, we can still grow up to 1MiB anytime + memory data (this is important because of pointers). Thus we can allo- + cate 1MiB address space, and use only a single memory page (usually + 4KiB) if that is enough. However, we can still grow up to 1MiB anytime if needed. (3) Who "owns" a JIT stack? @@ -5621,8 +5928,8 @@ JIT STACK FAQ The owner of the stack is the user program, not the JIT studied pattern or anything else. The user program must ensure that if a stack is being used by pcre2_match(), (that is, it is assigned to a match context that - is passed to the pattern currently running), that stack must not be - used by any other threads (to avoid overwriting the same memory area). + is passed to the pattern currently running), that stack must not be + used by any other threads (to avoid overwriting the same memory area). The best practice for multithreaded programs is to allocate a stack for each thread, and return this stack through the JIT callback function. @@ -5630,36 +5937,36 @@ JIT STACK FAQ You can free a JIT stack at any time, as long as it will not be used by pcre2_match() again. When you assign the stack to a match context, only - a pointer is set. There is no reference counting or any other magic. + a pointer is set. There is no reference counting or any other magic. You can free compiled patterns, contexts, and stacks in any order, any- - time. Just do not call pcre2_match() with a match context pointing to + time. Just do not call pcre2_match() with a match context pointing to an already freed stack, as that will cause SEGFAULT. (Also, do not free - a stack currently used by pcre2_match() in another thread). You can - also replace the stack in a context at any time when it is not in use. + a stack currently used by pcre2_match() in another thread). You can + also replace the stack in a context at any time when it is not in use. You should free the previous stack before assigning a replacement. - (5) Should I allocate/free a stack every time before/after calling + (5) Should I allocate/free a stack every time before/after calling pcre2_match()? - No, because this is too costly in terms of resources. However, you - could implement some clever idea which release the stack if it is not - used in let's say two minutes. The JIT callback can help to achieve + No, because this is too costly in terms of resources. However, you + could implement some clever idea which release the stack if it is not + used in let's say two minutes. The JIT callback can help to achieve this without keeping a list of patterns. - (6) OK, the stack is for long term memory allocation. But what happens - if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB + (6) OK, the stack is for long term memory allocation. But what happens + if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the stack is freed? Especially on embedded systems, it might be a good idea to release mem- - ory sometimes without freeing the stack. There is no API for this at - the moment. Probably a function call which returns with the currently - allocated memory for any stack and another which allows releasing mem- + ory sometimes without freeing the stack. There is no API for this at + the moment. Probably a function call which returns with the currently + allocated memory for any stack and another which allows releasing mem- ory (shrinking the stack) would be a good idea if someone needs this. (7) This is too much of a headache. Isn't there any better solution for JIT stack handling? - No, thanks to Windows. If POSIX threads were used everywhere, we could + No, thanks to Windows. If POSIX threads were used everywhere, we could throw out this complicated API. @@ -5668,18 +5975,18 @@ FREEING JIT SPECULATIVE MEMORY void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); The JIT executable allocator does not free all memory when it is possi- - ble. It expects new allocations, and keeps some free memory around to - improve allocation speed. However, in low memory conditions, it might - be better to free all possible memory. You can cause this to happen by - calling pcre2_jit_free_unused_memory(). Its argument is a general con- + ble. It expects new allocations, and keeps some free memory around to + improve allocation speed. However, in low memory conditions, it might + be better to free all possible memory. You can cause this to happen by + calling pcre2_jit_free_unused_memory(). Its argument is a general con- text, for custom memory management, or NULL for standard memory manage- ment. EXAMPLE CODE - This is a single-threaded example that specifies a JIT stack without - using a callback. A real program should include error checking after + This is a single-threaded example that specifies a JIT stack without + using a callback. A real program should include error checking after all the function calls. int rc; @@ -5707,36 +6014,36 @@ EXAMPLE CODE JIT FAST PATH API Because the API described above falls back to interpreted matching when - JIT is not available, it is convenient for programs that are written + JIT is not available, it is convenient for programs that are written for general use in many environments. However, calling JIT via pcre2_match() does have a performance impact. Programs that are written - for use where JIT is known to be available, and which need the best - possible performance, can instead use a "fast path" API to call JIT - matching directly instead of calling pcre2_match() (obviously only for + for use where JIT is known to be available, and which need the best + possible performance, can instead use a "fast path" API to call JIT + matching directly instead of calling pcre2_match() (obviously only for patterns that have been successfully processed by pcre2_jit_compile()). - The fast path function is called pcre2_jit_match(), and it takes ex- - actly the same arguments as pcre2_match(). However, the subject string - must be specified with a length; PCRE2_ZERO_TERMINATED is not sup- + The fast path function is called pcre2_jit_match(), and it takes ex- + actly the same arguments as pcre2_match(). However, the subject string + must be specified with a length; PCRE2_ZERO_TERMINATED is not sup- ported. Unsupported option bits (for example, PCRE2_ANCHORED and - PCRE2_ENDANCHORED) are ignored, as is the PCRE2_NO_JIT option. The re- - turn values are also the same as for pcre2_match(), plus PCRE2_ER- + PCRE2_ENDANCHORED) are ignored, as is the PCRE2_NO_JIT option. The re- + turn values are also the same as for pcre2_match(), plus PCRE2_ER- ROR_JIT_BADOPTION if a matching mode (partial or complete) is requested that was not compiled. - When you call pcre2_match(), as well as testing for invalid options, a + When you call pcre2_match(), as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For exam- - ple, if the subject pointer is NULL but the length is non-zero, an im- - mediate error is given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF + ple, if the subject pointer is NULL but the length is non-zero, an im- + mediate error is given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the interests of speed, these - checks do not happen on the JIT fast path. If invalid UTF data is - passed when PCRE2_MATCH_INVALID_UTF was not set for pcre2_compile(), - the result is undefined. The program may crash or loop or give wrong - results. In the absence of PCRE2_MATCH_INVALID_UTF you should call - pcre2_jit_match() in UTF mode only if you are sure the subject is + checks do not happen on the JIT fast path. If invalid UTF data is + passed when PCRE2_MATCH_INVALID_UTF was not set for pcre2_compile(), + the result is undefined. The program may crash or loop or give wrong + results. In the absence of PCRE2_MATCH_INVALID_UTF you should call + pcre2_jit_match() in UTF mode only if you are sure the subject is valid. - Bypassing the sanity checks and the pcre2_match() wrapping can give + Bypassing the sanity checks and the pcre2_match() wrapping can give speedups of more than 10%. @@ -5754,15 +6061,14 @@ AUTHOR REVISION - Last updated: 21 February 2024 + Last updated: 22 August 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 21 February 2024 PCRE2JIT(3) +PCRE2 10.45-RC1 22 August 2024 PCRE2JIT(3) ------------------------------------------------------------------------------ - PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) @@ -5838,15 +6144,14 @@ AUTHOR REVISION - Last updated: August 2023 + Last updated: 16 August 2023 Copyright (c) 1997-2023 University of Cambridge. -PCRE2 10.43 1 August 2023 PCRE2LIMITS(3) +PCRE2 10.45-RC1 16 August 2023 PCRE2LIMITS(3) ------------------------------------------------------------------------------ - PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) @@ -5860,7 +6165,7 @@ PCRE2 MATCHING ALGORITHMS in PCRE2 for matching a compiled regular expression against a given subject string. The "standard" algorithm is the one provided by the pcre2_match() function. This works in the same as Perl's matching func- - tion, and provide a Perl-compatible matching operation. The just-in- + tion, and provides a Perl-compatible matching operation. The just-in- time (JIT) optimization that is described in the pcre2jit documentation is compatible with this function. @@ -5872,7 +6177,7 @@ PCRE2 MATCHING ALGORITHMS When there is only one possible way in which a given subject string can match a pattern, the two algorithms give the same answer. A difference arises, however, when there are multiple possibilities. For example, if - the pattern + the anchored pattern ^<.*> @@ -5948,83 +6253,86 @@ THE ALTERNATIVE MATCHING ALGORITHM first match (which is necessarily the shortest) is found. Note that the size of vector needed to contain all the results depends - on the number of simultaneous matches, not on the number of parentheses - in the pattern. Using pcre2_match_data_create_from_pattern() to create - the match data block is therefore not advisable when doing DFA match- - ing. + on the number of simultaneous matches, not on the number of capturing + parentheses in the pattern. Using pcre2_match_data_create_from_pat- + tern() to create the match data block is therefore not advisable when + doing DFA matching. - Note also that all the matches that are found start at the same point + Note also that all the matches that are found start at the same point in the subject. If the pattern cat(er(pillar)?)? - is matched against the string "the caterpillar catchment", the result - is the three strings "caterpillar", "cater", and "cat" that start at - the fifth character of the subject. The algorithm does not automati- + is matched against the string "the caterpillar catchment", the result + is the three strings "caterpillar", "cater", and "cat" that start at + the fifth character of the subject. The algorithm does not automati- cally move on to find matches that start at later positions. PCRE2's "auto-possessification" optimization usually applies to charac- - ter repeats at the end of a pattern (as well as internally). For exam- + ter repeats at the end of a pattern (as well as internally). For exam- ple, the pattern "a\d+" is compiled as if it were "a\d++" because there - is no point even considering the possibility of backtracking into the - repeated digits. For DFA matching, this means that only one possible - match is found. If you really do want multiple matches in such cases, - either use an ungreedy repeat ("a\d+?") or set the PCRE2_NO_AUTO_POS- + is no point even considering the possibility of backtracking into the + repeated digits. For DFA matching, this means that only one possible + match is found. If you really do want multiple matches in such cases, + either use an ungreedy repeat ("a\d+?") or set the PCRE2_NO_AUTO_POS- SESS option when compiling. - There are a number of features of PCRE2 regular expressions that are - not supported or behave differently in the alternative matching func- + There are a number of features of PCRE2 regular expressions that are + not supported or behave differently in the alternative matching func- tion. Those that are not supported cause an error if encountered. - 1. Because the algorithm finds all possible matches, the greedy or un- - greedy nature of repetition quantifiers is not relevant (though it may - affect auto-possessification, as just described). During matching, - greedy and ungreedy quantifiers are treated in exactly the same way. + 1. Because the algorithm finds all possible matches, the greedy or un- + greedy nature of repetition quantifiers is not relevant (though it may + affect auto-possessification, as just described). During matching, + greedy and ungreedy quantifiers are treated in exactly the same way. However, possessive quantifiers can make a difference when what follows - could also match what is quantified, for example in a pattern like + could also match what is quantified, for example in a pattern like this: ^a++\w! - This pattern matches "aaab!" but not "aaa!", which would be matched by - a non-possessive quantifier. Similarly, if an atomic group is present, - it is matched as if it were a standalone pattern at the current point, - and the longest match is then "locked in" for the rest of the overall + This pattern matches "aaab!" but not "aaa!", which would be matched by + a non-possessive quantifier. Similarly, if an atomic group is present, + it is matched as if it were a standalone pattern at the current point, + and the longest match is then "locked in" for the rest of the overall pattern. 2. When dealing with multiple paths through the tree simultaneously, it - is not straightforward to keep track of captured substrings for the - different matching possibilities, and PCRE2's implementation of this + is not straightforward to keep track of captured substrings for the + different matching possibilities, and PCRE2's implementation of this algorithm does not attempt to do this. This means that no captured sub- strings are available. - 3. Because no substrings are captured, backreferences within the pat- - tern are not supported. + 3. Because no substrings are captured, a number of related features are + not available: - 4. For the same reason, conditional expressions that use a backrefer- - ence as the condition or test for a specific group recursion are not - supported. + (a) Backreferences; - 5. Again for the same reason, script runs are not supported. + (b) Conditional expressions that use a backreference as the condition + or test for a specific group recursion; - 6. Because many paths through the tree may be active, the \K escape se- - quence, which resets the start of the match when encountered (but may + (c) Script runs; + + (d) Scan substring assertions. + + 4. Because many paths through the tree may be active, the \K escape se- + quence, which resets the start of the match when encountered (but may be on some paths and not on others), is not supported. - 7. Callouts are supported, but the value of the capture_top field is + 5. Callouts are supported, but the value of the capture_top field is always 1, and the value of the capture_last field is always 0. - 8. The \C escape sequence, which (in the standard algorithm) always - matches a single code unit, even in a UTF mode, is not supported in - these modes, because the alternative algorithm moves through the sub- - ject string one character (not code unit) at a time, for all active - paths through the tree. + 6. The \C escape sequence, which (in the standard algorithm) always + matches a single code unit, even in a UTF mode, is not supported in UTF + modes because the alternative algorithm moves through the subject + string one character (not code unit) at a time, for all active paths + through the tree. - 9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) + 7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not supported. (*FAIL) is supported, and behaves like a failing negative assertion. - 10. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not sup- + 8. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not sup- ported by pcre2_dfa_match(). @@ -6049,13 +6357,15 @@ DISADVANTAGES OF THE ALTERNATIVE ALGORITHM partly because it has to search for all possible matches, but is also because it is less susceptible to optimization. - 2. Capturing parentheses, backreferences, script runs, and matching - within invalid UTF string are not supported. + 2. Capturing parentheses and other features such as backreferences that + rely on them are not supported. - 3. Although atomic groups are supported, their use does not provide the + 3. Matching within invalid UTF strings is not supported. + + 4. Although atomic groups are supported, their use does not provide the performance advantage that it does for the standard algorithm. - 4. JIT optimization is not supported. + 5. JIT optimization is not supported. AUTHOR @@ -6067,20 +6377,19 @@ AUTHOR REVISION - Last updated: 19 January 2024 + Last updated: 30 August 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 19 January 2024 PCRE2MATCHING(3) +PCRE2 10.45-RC1 30 August 2024 PCRE2MATCHING(3) ------------------------------------------------------------------------------ - PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) NAME - PCRE2 - Perl-compatible regular expressions + PCRE2 - Perl-compatible regular expressions (revised API) PARTIAL MATCHING IN PCRE2 @@ -6451,15 +6760,14 @@ AUTHOR REVISION - Last updated: 04 September 2019 + Last updated: 27 November 2024 Copyright (c) 1997-2019 University of Cambridge. -PCRE2 10.34 04 September 2019 PCRE2PARTIAL(3) +PCRE2 10.45-RC1 27 November 2024 PCRE2PARTIAL(3) ------------------------------------------------------------------------------ - PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3) @@ -6473,9 +6781,11 @@ PCRE2 REGULAR EXPRESSION DETAILS by PCRE2 are described in detail below. There is a quick-reference syn- tax summary in the pcre2syntax page. PCRE2 tries to match Perl syntax and semantics as closely as it can. PCRE2 also supports some alterna- - tive regular expression syntax (which does not conflict with the Perl - syntax) in order to provide some compatibility with regular expressions - in Python, .NET, and Oniguruma. + tive regular expression syntax that does not conflict with the Perl + syntax in order to provide some compatibility with regular expressions + in Python, .NET, and Oniguruma. There are in addition some options that + enable alternative syntax and semantics that are not the same as in + Perl. Perl's regular expressions are described in its own documentation, and regular expressions in general are covered in a number of books, some @@ -6494,82 +6804,98 @@ PCRE2 REGULAR EXPRESSION DETAILS tion, are discussed in the pcre2matching page. +EBCDIC CHARACTER CODES + + Most computers use ASCII or Unicode for encoding characters, and PCRE2 + assumes this by default. However, it can be compiled to run in an envi- + ronment that uses the EBCDIC code, which is the case for some IBM main- + frame operating systems. In the sections below, character code values + are ASCII or Unicode; in an EBCDIC environment these characters may + have different code values, and there are no code points greater than + 255. Differences in behaviour when PCRE2 is running in an EBCDIC envi- + ronment are described in the section "EBCDIC environments" below, which + you can ignore unless you really are in an EBCDIC environment. + + SPECIAL START-OF-PATTERN ITEMS - A number of options that can be passed to pcre2_compile() can also be + A number of options that can be passed to pcre2_compile() can also be set by special items at the start of a pattern. These are not Perl-com- - patible, but are provided to make these options accessible to pattern - writers who are not able to change the program that processes the pat- - tern. Any number of these items may appear, but they must all be to- - gether right at the start of the pattern string, and the letters must + patible, but are provided to make these options accessible to pattern + writers who are not able to change the program that processes the pat- + tern. Any number of these items may appear, but they must all be to- + gether right at the start of the pattern string, and the letters must be in upper case. UTF support In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either as single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 - can be specified for the 32-bit library, in which case it constrains - the character values to valid Unicode code points. To process UTF - strings, PCRE2 must be built to include Unicode support (which is the - default). When using UTF strings you must either call the compiling - function with one or both of the PCRE2_UTF or PCRE2_MATCH_INVALID_UTF - options, or the pattern must start with the special sequence (*UTF), - which is equivalent to setting the relevant PCRE2_UTF. How setting a + can be specified for the 32-bit library, in which case it constrains + the character values to valid Unicode code points. To process UTF + strings, PCRE2 must be built to include Unicode support (which is the + default). When using UTF strings you must either call the compiling + function with one or both of the PCRE2_UTF or PCRE2_MATCH_INVALID_UTF + options, or the pattern must start with the special sequence (*UTF), + which is equivalent to setting the relevant PCRE2_UTF. How setting a UTF mode affects pattern matching is mentioned in several places below. There is also a summary of features in the pcre2unicode page. Some applications that allow their users to supply patterns may wish to - restrict them to non-UTF data for security reasons. If the - PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not al- + restrict them to non-UTF data for security reasons. If the + PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not al- lowed, and its appearance in a pattern causes an error. Unicode property support - Another special sequence that may appear at the start of a pattern is - (*UCP). This has the same effect as setting the PCRE2_UCP option: it - causes sequences such as \d and \w to use Unicode properties to deter- + Another special sequence that may appear at the start of a pattern is + (*UCP). This has the same effect as setting the PCRE2_UCP option: it + causes sequences such as \d and \w to use Unicode properties to deter- mine character types, instead of recognizing only characters with codes less than 256 via a lookup table. If also causes upper/lower casing op- - erations to use Unicode properties for characters with code points - greater than 127, even when UTF is not set. These behaviours can be - changed within the pattern; see the section entitled "Internal Option + erations to use Unicode properties for characters with code points + greater than 127, even when UTF is not set. These behaviours can be + changed within the pattern; see the section entitled "Internal Option Setting" below. Some applications that allow their users to supply patterns may wish to - restrict them for security reasons. If the PCRE2_NEVER_UCP option is + restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to pcre2_compile(), (*UCP) is not allowed, and its appearance in a pattern causes an error. Locking out empty string matching Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same - effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option + effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option to whichever matching function is subsequently called to match the pat- - tern. These options lock out the matching of empty strings, either en- + tern. These options lock out the matching of empty strings, either en- tirely, or only at the start of the subject. Disabling auto-possessification - If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as - setting the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making - quantifiers possessive when what follows cannot match the repeated - item. For example, by default a+b is treated as a++b. For more details, - see the pcre2api documentation. + If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as + setting the PCRE2_NO_AUTO_POSSESS option, or calling pcre2_set_opti- + mize() with a PCRE2_AUTO_POSSESS_OFF directive. This stops PCRE2 from + making quantifiers possessive when what follows cannot match the re- + peated item. For example, by default a+b is treated as a++b. For more + details, see the pcre2api documentation. Disabling start-up optimizations - If a pattern starts with (*NO_START_OPT), it has the same effect as - setting the PCRE2_NO_START_OPTIMIZE option. This disables several opti- - mizations for quickly reaching "no match" results. For more details, - see the pcre2api documentation. + If a pattern starts with (*NO_START_OPT), it has the same effect as + setting the PCRE2_NO_START_OPTIMIZE option, or calling pcre2_set_opti- + mize() with a PCRE2_START_OPTIMIZE_OFF directive. This disables several + optimizations for quickly reaching "no match" results. For more de- + tails, see the pcre2api documentation. Disabling automatic anchoring If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect - as setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimiza- - tions that apply to patterns whose top-level branches all start with .* - (match any number of arbitrary characters). For more details, see the - pcre2api documentation. + as setting the PCRE2_NO_DOTSTAR_ANCHOR option, or calling pcre2_set_op- + timize() with a PCRE2_DOTSTAR_ANCHOR_OFF directive. This disables opti- + mizations that apply to patterns whose top-level branches all start + with .* (match any number of arbitrary characters). For more details, + see the pcre2api documentation. Disabling JIT compilation @@ -6666,33 +6992,27 @@ SPECIAL START-OF-PATTERN ITEMS CODE) is also recognized, corresponding to PCRE2_BSR_UNICODE. -EBCDIC CHARACTER CODES - - PCRE2 can be compiled to run in an environment that uses EBCDIC as its - character code instead of ASCII or Unicode (typically a mainframe sys- - tem). In the sections below, character code values are ASCII or Uni- - code; in an EBCDIC environment these characters may have different code - values, and there are no code points greater than 255. - - CHARACTERS AND METACHARACTERS - A regular expression is a pattern that is matched against a subject - string from left to right. Most characters stand for themselves in a - pattern, and match the corresponding characters in the subject. As a + A regular expression is a pattern that is matched against a subject + string from left to right. Most characters stand for themselves in a + pattern, and match the corresponding characters in the subject. As a trivial example, the pattern The quick brown fox matches a portion of a subject string that is identical to itself. When - caseless matching is specified (the PCRE2_CASELESS option or (?i) - within the pattern), letters are matched independently of case. Note - that there are two ASCII characters, K and S, that, in addition to - their lower case ASCII equivalents, are case-equivalent with Unicode - U+212A (Kelvin sign) and U+017F (long S) respectively when either + caseless matching is specified (the PCRE2_CASELESS option or (?i) + within the pattern), letters are matched independently of case. Note + that there are two ASCII characters, K and S, that, in addition to + their lower case ASCII equivalents, are case-equivalent with Unicode + U+212A (Kelvin sign) and U+017F (long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the PCRE2_EXTRA_CASELESS_RESTRICT - option is in force (either passed to pcre2_compile() or set by (?r) - within the pattern). + option is in force (either passed to pcre2_compile() or set by (*CASE- + LESS_RESTRICT) or (?r) within the pattern). If the PCRE2_EXTRA_TURK- + ISH_CASING option is in force (either passed to pcre2_compile() or set + by (*TURKISH_CASING) within the pattern), then the 'i' letters are + matched according to Turkish and Azeri languages. The power of regular expressions comes from the ability to include wild cards, character classes, alternatives, and repetitions in the pattern. @@ -6739,7 +7059,7 @@ CHARACTERS AND METACHARACTERS If a pattern is compiled with the PCRE2_EXTENDED option, most white space in the pattern, other than in a character class, within a \Q...\E sequence, or between a # outside a character class and the next new- - line, inclusive, are ignored. An escaping backslash can be used to in- + line, inclusive, is ignored. An escaping backslash can be used to in- clude a white space or a # character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same applies, but in addition unescaped space and horizontal tab characters are ignored inside a @@ -6797,6 +7117,13 @@ BACKSLASH error, because the character class is then not terminated by a closing square bracket. + Another difference from Perl is that any appearance of \Q or \E inside + what might otherwise be a quantifier causes PCRE2 not to recognize the + sequence as a quantifier. Perl recognizes a quantifier if (redundantly) + either of the numbers is inside \Q...\E, but not if the separating + comma is. When not recognized as a quantifier a sequence such as + {\Q1\E,2} is treated as the literal string "{1,2}". + Non-printing characters A second use of backslash provides a way of encoding non-printing char- @@ -6815,115 +7142,107 @@ BACKSLASH \r carriage return (hex 0D) (but see below) \t tab (hex 09) \0dd character with octal code 0dd - \ddd character with octal code ddd, or backreference + \ddd character with octal code ddd, or back reference \o{ddd..} character with octal code ddd.. \xhh character with hex code hh \x{hhh..} character with hex code hhh.. \N{U+hhh..} character with Unicode hex code point hhh.. - By default, after \x that is not followed by {, from zero to two hexa- - decimal digits are read (letters can be in upper or lower case). Any - number of hexadecimal digits may appear between \x{ and }. If a charac- - ter other than a hexadecimal digit appears between \x{ and }, or if - there is no terminating }, an error occurs. + A description of how back references work is given later, following the + discussion of parenthesized groups. + + By default, after \x that is not followed by {, one or two hexadecimal + digits are read (letters can be in upper or lower case). If the charac- + ter that follows \x is neither { nor a hexadecimal digit, an error oc- + curs. This is different from Perl's default behaviour, which generates + a NUL character, but is in line with the behaviour of Perl's 'strict' + mode in re. + + Any number of hexadecimal digits may appear between \x{ and }. If a + character other than a hexadecimal digit appears between \x{ and }, or + if there is no terminating }, an error occurs. Characters whose code points are less than 256 can be defined by either of the two syntaxes for \x or by an octal sequence. There is no differ- ence in the way they are handled. For example, \xdc is exactly the same - as \x{dc} or \334. However, using the braced versions does make such + as \x{dc} or \334. However, using the braced versions does make such sequences easier to read. - Support is available for some ECMAScript (aka JavaScript) escape se- + Support is available for some ECMAScript (aka JavaScript) escape se- quences via two compile-time options. If PCRE2_ALT_BSUX is set, the se- - quence \x followed by { is not recognized. Only if \x is followed by - two hexadecimal digits is it recognized as a character escape. Other- - wise it is interpreted as a literal "x" character. In this mode, sup- - port for code points greater than 256 is provided by \u, which must be - followed by four hexadecimal digits; otherwise it is interpreted as a + quence \x followed by { is not recognized. Only if \x is followed by + two hexadecimal digits is it recognized as a character escape. Other- + wise it is interpreted as a literal "x" character. In this mode, sup- + port for code points greater than 256 is provided by \u, which must be + followed by four hexadecimal digits; otherwise it is interpreted as a literal "u" character. - PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in ad- + PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in ad- dition, \u{hhh..} is recognized as the character specified by hexadeci- mal code point. There may be any number of hexadecimal digits, but un- - like other places that also use curly brackets, spaces are not allowed - and would result in the string being interpreted as a literal. This + like other places that also use curly brackets, spaces are not allowed + and would result in the string being interpreted as a literal. This syntax is from ECMAScript 6. - The \N{U+hhh..} escape sequence is recognized only when PCRE2 is oper- - ating in UTF mode. Perl also uses \N{name} to specify characters by - Unicode name; PCRE2 does not support this. Note that when \N is not + The \N{U+hhh..} escape sequence is recognized only when PCRE2 is oper- + ating in UTF mode. Perl also uses \N{name} to specify characters by + Unicode name; PCRE2 does not support this. Note that when \N is not followed by an opening brace (curly bracket) it has an entirely differ- ent meaning, matching any character that is not a newline. - There are some legacy applications where the escape sequence \r is ex- - pected to match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option - is set, \r in a pattern is converted to \n so that it matches a LF + There are some legacy applications where the escape sequence \r is ex- + pected to match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option + is set, \r in a pattern is converted to \n so that it matches a LF (linefeed) instead of a CR (carriage return) character. - An error occurs if \c is not followed by a character whose ASCII code - point is in the range 32 to 126. The precise effect of \cx is as fol- - lows: if x is a lower case letter, it is converted to upper case. Then + An error occurs if \c is not followed by a character whose ASCII code + point is in the range 32 to 126. The precise effect of \cx is as fol- + lows: if x is a lower case letter, it is converted to upper case. Then bit 6 of the character (hex 40) is inverted. Thus \cA to \cZ become hex - 01 to hex 1A (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and - \c; becomes hex 7B (; is 3B). If the code unit following \c has a code + 01 to hex 1A (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and + \c; becomes hex 7B (; is 3B). If the code unit following \c has a code point less than 32 or greater than 126, a compile-time error occurs. - When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. - \a, \e, \f, \n, \r, and \t generate the appropriate EBCDIC code values. - The \c escape is processed as specified for Perl in the perlebcdic doc- - ument. The only characters that are allowed after \c are A-Z, a-z, or - one of @, [, \, ], ^, _, or ?. Any other character provokes a compile- - time error. The sequence \c@ encodes character code 0; after \c the - letters (in either case) encode characters 1-26 (hex 01 to hex 1A); [, - \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and \c? be- - comes either 255 (hex FF) or 95 (hex 5F). + For differences in the way some escapes behave in EBCDIC environments, + see section "EBCDIC environments" below. - Thus, apart from \c?, these escapes generate the same character code - values as they do in an ASCII environment, though the meanings of the - values mostly differ. For example, \cG always generates code value 7, - which is BEL in ASCII but DEL in EBCDIC. + Octal escapes and back references - The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, - but because 127 is not a control character in EBCDIC, Perl makes it - generate the APC character. Unfortunately, there are several variants - of EBCDIC. In most of them the APC character has the value 255 (hex - FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If - certain other characters have POSIX-BC values, PCRE2 makes \c? generate - 95; otherwise it generates 255. + The escape \o must be followed by a sequence of octal digits, enclosed + in braces. An error occurs if this is not the case. This escape pro- + vides a way of specifying character code points as octal numbers + greater than 0777, and it also allows octal numbers and backreferences + to be unambiguously distinguished. - After \0 up to two further octal digits are read. If there are fewer - than two digits, just those that are present are used. Thus the se- - quence \0\x\015 specifies two binary zeros followed by a CR character - (code value 13). Make sure you supply two digits after the initial zero - if the pattern character that follows is itself an octal digit. + If braces are not used, after \0 up to two further octal digits are + read. However, if the PCRE2_EXTRA_NO_BS0 option is set, at least one + more octal digit must follow \0 (use \00 to generate a NUL character). + Make sure you supply two digits after the initial zero if the pattern + character that follows is itself an octal digit. - The escape \o must be followed by a sequence of octal digits, enclosed - in braces. An error occurs if this is not the case. This escape is a - recent addition to Perl; it provides way of specifying character code - points as octal numbers greater than 0777, and it also allows octal - numbers and backreferences to be unambiguously specified. + Inside a character class, when a backslash is followed by any octal + digit, up to three octal digits are read to generate a code point. Any + subsequent digits stand for themselves. The sequences \8 and \9 are + treated as the literal characters "8" and "9". + + Outside a character class, Perl's handling of a backslash followed by a + digit other than 0 is complicated by ambiguity, and Perl has changed + over time, causing PCRE2 also to change. From PCRE2 release 10.45 there + is an option called PCRE2_EXTRA_PYTHON_OCTAL that causes PCRE2 to use + Python's unambiguous rules. The next two subsections describe the two + sets of rules. For greater clarity and unambiguity, it is best to avoid following \ by - a digit greater than zero. Instead, use \o{...} or \x{...} to specify + a digit greater than zero. Instead, use \o{...} or \x{...} to specify numerical character code points, and \g{...} to specify backreferences. - The following paragraphs describe the old, ambiguous syntax. - - The handling of a backslash followed by a digit other than 0 is compli- - cated, and Perl has changed over time, causing PCRE2 also to change. - Outside a character class, PCRE2 reads the digit and any following dig- - its as a decimal number. If the number is less than 10, begins with the - digit 8 or 9, or if there are at least that many previous capture - groups in the expression, the entire sequence is taken as a backrefer- - ence. A description of how this works is given later, following the - discussion of parenthesized groups. Otherwise, up to three octal dig- - its are read to form a character code. + Perl rules for non-class backslash 1-9 - Inside a character class, PCRE2 handles \8 and \9 as the literal char- - acters "8" and "9", and otherwise reads up to three octal digits fol- - lowing the backslash, using them to generate a data character. Any sub- - sequent digits stand for themselves. For example, outside a character - class: + All the digits that follow the backslash are read as a decimal number. + If the number is less than 10, begins with the digit 8 or 9, or if + there are at least that many previous capture groups in the expression, + the entire sequence is taken as a back reference. Otherwise, up to + three octal digits are read to form a character code. For example: \040 is another way of writing an ASCII space \40 is the same, provided there are fewer than 40 @@ -6939,10 +7258,21 @@ BACKSLASH the value 255 (decimal) \81 is always a backreference - Note that octal values of 100 or greater that are specified using this - syntax must not be introduced by a leading zero, because no more than + Note that octal values of 100 or greater that are specified using this + syntax must not be introduced by a leading zero, because no more than three octal digits are ever read. + Python rules for non_class backslash 1-9 + + If there are at least three octal digits after the backslash, exactly + three are read as an octal code point number, but the value must be no + greater than \377, even in modes where higher code point values are + supported. Any subsequent digits stand for themselves. If there are + fewer than three octal digits, the sequence is taken as a decimal back + reference. Thus, for example, \12 is always a back reference, indepen- + dent of how many captures there are in the pattern. An error is gener- + ated for a reference to a non-existent capturing group. + Constraints on character values Characters that are specified using octal or hexadecimal numbers are @@ -7161,7 +7491,7 @@ BACKSLASH tional escape sequences that match characters with specific properties are available. They can be used in any mode, though in 8-bit and 16-bit non-UTF modes these sequences are of course limited to testing charac- - ters whose code points are less than U+0100 and U+10000, respectively. + ters whose code points are less than U+0100 or U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all treated as being in the Un- known script and with an unassigned type. @@ -7179,15 +7509,34 @@ BACKSLASH \P{xx} a character without the xx property \X a Unicode extended grapheme cluster - The property names represented by xx above are not case-sensitive, and - in accordance with Unicode's "loose matching" rules, spaces, hyphens, - and underscores are ignored. There is support for Unicode script names, - Unicode general category properties, "Any", which matches any character - (including newline), Bidi_Class, a number of binary (yes/no) proper- - ties, and some special PCRE2 properties (described below). Certain - other Perl properties such as "InMusicalSymbols" are not supported by - PCRE2. Note that \P{Any} does not match any characters, so always - causes a match failure. + For compatibility with Perl, negation can be specified by including a + circumflex between the opening brace and the property. For example, + \p{^Lu} is the same as \P{Lu}. + + In accordance with Unicode's "loose matching" rules, ASCII white space + characters, hyphens, and underscores are ignored in the properties rep- + resented by xx above. As well as the space character, ASCII white space + can be tab, linefeed, vertical tab, formfeed, or carriage return. + + Some properties are specified as a name only; others as a name and a + value, separated by a colon or an equals sign. The names and values + consist of ASCII letters and digits (with one Perl-specific exception, + see below). They are not case sensitive. Note, however, that the es- + capes themselves, \p and \P, are case sensitive. There are abbrevia- + tions for many names. The following examples are all equivalent: + + \p{bidiclass=al} + \p{BC=al} + \p{ Bidi_Class : AL } + \p{ Bi-di class = Al } + \P{ ^ Bi-di class = Al } + + There is support for Unicode script names, Unicode general category + properties, "Any", which matches any character (including newline), + Bidi_Class, a number of binary (yes/no) properties, and some special + PCRE2 properties (described below). Certain other Perl properties such + as "InMusicalSymbols" are not supported by PCRE2. Note that \P{Any} + does not match any characters, so always causes a match failure. Script properties for \p and \P @@ -7197,15 +7546,15 @@ BACKSLASH Adlam script as an example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas \p{scx:Adlam} matches, in addition, characters that have Adlam in their extensions list. The full names "script" and - "script extensions" for the property types are recognized, and a equals - sign is an alternative to the colon. If a script name is given without - a property type, for example, \p{Adlam}, it is treated as \p{scx:Ad- - lam}. Perl changed to this interpretation at release 5.26 and PCRE2 - changed at release 10.40. + "script extensions" for the property types are recognized and, as for + all property specifications, an equals sign is an alternative to the + colon. If a script name is given without a property type, for example, + \p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this inter- + pretation at release 5.26 and PCRE2 changed at release 10.40. Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others - that are not part of an identified script are lumped together as "Com- + that are not part of an identified script are lumped together as "Com- mon". The current list of recognized script names and their 4-character abbreviations can be obtained by running this command: @@ -7215,15 +7564,11 @@ BACKSLASH The general category property for \p and \P Each character has exactly one Unicode general category property, spec- - ified by a two-letter abbreviation. For compatibility with Perl, nega- - tion can be specified by including a circumflex between the opening - brace and the property name. For example, \p{^Lu} is the same as - \P{Lu}. - - If only one letter is specified with \p or \P, it includes all the gen- - eral category properties that start with that letter. In this case, in - the absence of negation, the curly brackets in the escape sequence are - optional; these two examples have the same effect: + ified by a two-letter abbreviation. If only one letter is specified + with \p or \P, it includes all the general category properties that + start with that letter. In this case, in the absence of negation, the + curly brackets in the escape sequence are optional; these two examples + have the same effect: \p{L} \pL @@ -7238,6 +7583,7 @@ BACKSLASH Cs Surrogate L Letter + Lc Cased letter Ll Lower case letter Lm Modifier letter Lo Other letter @@ -7274,35 +7620,36 @@ BACKSLASH Zp Paragraph separator Zs Space separator - The special property LC, which has the synonym L&, is also supported: - it matches a character that has the Lu, Ll, or Lt property, in other - words, a letter that is not classified as a modifier or "other". - - The Cs (Surrogate) property applies only to characters whose code - points are in the range U+D800 to U+DFFF. These characters are no dif- - ferent to any other character when PCRE2 is not in UTF mode (using the - 16-bit or 32-bit library). However, they are not valid in Unicode + Perl originally used the name L& for the Lc property. This is still + supported by Perl, but discouraged. PCRE2 also still supports it. This + property matches any character that has the Lu, Ll, or Lt property, in + other words, any letter that is not classified as a modifier or + "other". From release 10.45 of PCRE2 the properties Lu, Ll, and Lt are + all treated as Lc when case-independent matching is set by the + PCRE2_CASELESS option or (?i) within the pattern. The other properties + are not affected by caseless matching. + + The Cs (Surrogate) property applies only to characters whose code + points are in the range U+D800 to U+DFFF. These characters are no dif- + ferent to any other character when PCRE2 is not in UTF mode (using the + 16-bit or 32-bit library). However, they are not valid in Unicode strings and so cannot be tested by PCRE2 in UTF mode, unless UTF valid- - ity checking has been turned off (see the discussion of + ity checking has been turned off (see the discussion of PCRE2_NO_UTF_CHECK in the pcre2api page). - The long synonyms for property names that Perl supports (such as - \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix + The long synonyms for property names that Perl supports (such as + \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix any of these properties with "Is". No character that is in the Unicode table has the Cn (unassigned) prop- erty. Instead, this property is assumed for any code point that is not in the Unicode table. - Specifying caseless matching does not affect these escape sequences. - For example, \p{Lu} always matches only upper case letters. This is - different from the behaviour of current versions of Perl. - Binary (yes/no) properties for \p and \P - Unicode defines a number of binary properties, that is, properties - whose only values are true or false. You can obtain a list of those - that are recognized by \p and \P, along with their abbreviations, by + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by running this command: pcre2test -LP @@ -7337,63 +7684,65 @@ BACKSLASH RLI right-to-left isolate RLO right-to-left override S segment separator - WS which space + WS white space - An equals sign may be used instead of a colon. The class names are - case-insensitive; only the short names listed above are recognized. + As in all property specifications, an equals sign may be used instead + of a colon and the class names are case-insensitive. Only the short + names listed above are recognized; PCRE2 does not at present support + any long alternatives. Extended grapheme clusters - The \X escape matches any number of Unicode characters that form an + The \X escape matches any number of Unicode characters that form an "extended grapheme cluster", and treats the sequence as an atomic group - (see below). Unicode supports various kinds of composite character by - giving each character a grapheme breaking property, and having rules + (see below). Unicode supports various kinds of composite character by + giving each character a grapheme breaking property, and having rules that use these properties to define the boundaries of extended grapheme - clusters. The rules are defined in Unicode Standard Annex 29, "Unicode - Text Segmentation". Unicode 11.0.0 abandoned the use of some previous - properties that had been used for emojis. Instead it introduced vari- - ous emoji-specific properties. PCRE2 uses only the Extended Picto- + clusters. The rules are defined in Unicode Standard Annex 29, "Unicode + Text Segmentation". Unicode 11.0.0 abandoned the use of some previous + properties that had been used for emojis. Instead it introduced vari- + ous emoji-specific properties. PCRE2 uses only the Extended Picto- graphic property. - \X always matches at least one character. Then it decides whether to + \X always matches at least one character. Then it decides whether to add additional characters according to the following rules for ending a cluster: 1. End at the end of the subject string. - 2. Do not end between CR and LF; otherwise end after any control char- + 2. Do not end between CR and LF; otherwise end after any control char- acter. - 3. Do not break Hangul (a Korean script) syllable sequences. Hangul - characters are of five types: L, V, T, LV, and LVT. An L character may - be followed by an L, V, LV, or LVT character; an LV or V character may - be followed by a V or T character; an LVT or T character may be fol- + 3. Do not break Hangul (a Korean script) syllable sequences. Hangul + characters are of five types: L, V, T, LV, and LVT. An L character may + be followed by an L, V, LV, or LVT character; an LV or V character may + be followed by a V or T character; an LVT or T character may be fol- lowed only by a T character. 4. Do not end before extending characters or spacing marks or the zero- - width joiner (ZWJ) character. Characters with the "mark" property al- + width joiner (ZWJ) character. Characters with the "mark" property al- ways have the "extend" grapheme breaking property. 5. Do not end after prepend characters. - 6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width - joiner) sequences. An emoji ZWJ sequence consists of a character with - the Extended_Pictographic property, optionally followed by one or more - characters with the Extend property, followed by the ZWJ character, + 6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width + joiner) sequences. An emoji ZWJ sequence consists of a character with + the Extended_Pictographic property, optionally followed by one or more + characters with the Extend property, followed by the ZWJ character, followed by another Extended_Pictographic character. - 7. Do not break within emoji flag sequences. That is, do not break be- - tween regional indicator (RI) characters if there are an odd number of + 7. Do not break within emoji flag sequences. That is, do not break be- + tween regional indicator (RI) characters if there are an odd number of RI characters before the break point. 8. Otherwise, end the cluster. PCRE2's additional properties - As well as the standard Unicode properties described above, PCRE2 sup- + As well as the standard Unicode properties described above, PCRE2 sup- ports four more that make it possible to convert traditional escape se- - quences such as \w and \s to use Unicode properties. PCRE2 uses these - non-standard, non-Perl properties internally when PCRE2_UCP is set. + quences such as \w and \s to use Unicode properties. PCRE2 uses these + non-standard, non-Perl properties internally when PCRE2_UCP is set. However, they may also be used explicitly. These properties are: Xan Any alphanumeric character @@ -7401,73 +7750,74 @@ BACKSLASH Xsp Any Perl space character Xwd Any Perl "word" character - Xan matches characters that have either the L (letter) or the N (num- - ber) property. Xps matches the characters tab, linefeed, vertical tab, - form feed, or carriage return, and any other character that has the Z - (separator) property. Xsp is the same as Xps; in PCRE1 it used to ex- - clude vertical tab, for Perl compatibility, but Perl changed. Xwd - matches the same characters as Xan, plus those that match Mn (non-spac- - ing mark) or Pc (connector punctuation, which includes underscore). - - There is another non-standard property, Xuc, which matches any charac- - ter that can be represented by a Universal Character Name in C++ and - other programming languages. These are the characters $, @, ` (grave - accent), and all characters with Unicode code points greater than or - equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that - most base (ASCII) characters are excluded. (Universal Character Names - are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit. + Xan matches characters that have either the L (letter) or the N (num- + ber) property. Xps matches the characters tab, linefeed, vertical tab, + form feed, or carriage return, and any other character that has the Z + (separator) property (this includes the space character). Xsp is the + same as Xps; in PCRE1 it used to exclude vertical tab, for Perl compat- + ibility, but Perl changed. Xwd matches the same characters as Xan, plus + those that match Mn (non-spacing mark) or Pc (connector punctuation, + which includes underscore). + + There is another non-standard property, Xuc, which matches any charac- + ter that can be represented by a Universal Character Name in C++ and + other programming languages. These are the characters $, @, ` (grave + accent), and all characters with Unicode code points greater than or + equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that + most base (ASCII) characters are excluded. (Universal Character Names + are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit. Note that the Xuc property does not match these sequences but the char- acters that they represent.) Resetting the match start - In normal use, the escape sequence \K causes any previously matched + In normal use, the escape sequence \K causes any previously matched characters not to be included in the final matched sequence that is re- turned. For example, the pattern: foo\Kbar - matches "foobar", but reports that it has matched "bar". \K does not + matches "foobar", but reports that it has matched "bar". \K does not interact with anchoring in any way. The pattern: ^foo\Kbar - matches only when the subject begins with "foobar" (in single line - mode), though it again reports the matched string as "bar". This fea- - ture is similar to a lookbehind assertion (described below), but the + matches only when the subject begins with "foobar" (in single line + mode), though it again reports the matched string as "bar". This fea- + ture is similar to a lookbehind assertion (described below), but the part of the pattern that precedes \K is not constrained to match a lim- - ited number of characters, as is required for a lookbehind assertion. - The use of \K does not interfere with the setting of captured sub- + ited number of characters, as is required for a lookbehind assertion. + The use of \K does not interfere with the setting of captured sub- strings. For example, when the pattern (foo)\Kbar matches "foobar", the first substring is still set to "foo". - From version 5.32.0 Perl forbids the use of \K in lookaround asser- - tions. From release 10.38 PCRE2 also forbids this by default. However, - the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling - pcre2_compile() to re-enable the previous behaviour. When this option + From version 5.32.0 Perl forbids the use of \K in lookaround asser- + tions. From release 10.38 PCRE2 also forbids this by default. However, + the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling + pcre2_compile() to re-enable the previous behaviour. When this option is set, \K is acted upon when it occurs inside positive assertions, but - is ignored in negative assertions. Note that when a pattern such as - (?=ab\K) matches, the reported start of the match can be greater than - the end of the match. Using \K in a lookbehind assertion at the start - of a pattern can also lead to odd effects. For example, consider this + is ignored in negative assertions. Note that when a pattern such as + (?=ab\K) matches, the reported start of the match can be greater than + the end of the match. Using \K in a lookbehind assertion at the start + of a pattern can also lead to odd effects. For example, consider this pattern: (?<=\Kfoo)bar - If the subject is "foobar", a call to pcre2_match() with a starting - offset of 3 succeeds and reports the matching string as "foobar", that - is, the start of the reported match is earlier than where the match + If the subject is "foobar", a call to pcre2_match() with a starting + offset of 3 succeeds and reports the matching string as "foobar", that + is, the start of the reported match is earlier than where the match started. Simple assertions - The final use of backslash is for certain simple assertions. An asser- - tion specifies a condition that has to be met at a particular point in - a match, without consuming any characters from the subject string. The - use of groups for more complicated assertions is described below. The + The final use of backslash is for certain simple assertions. An asser- + tion specifies a condition that has to be met at a particular point in + a match, without consuming any characters from the subject string. The + use of groups for more complicated assertions is described below. The backslashed assertions are: \b matches at a word boundary @@ -7478,193 +7828,193 @@ BACKSLASH \z matches only at the end of the subject \G matches at the first matching position in the subject - Inside a character class, \b has a different meaning; it matches the - backspace character. If any other of these assertions appears in a + Inside a character class, \b has a different meaning; it matches the + backspace character. If any other of these assertions appears in a character class, an "invalid escape sequence" error is generated. - A word boundary is a position in the subject string where the current - character and the previous character do not both match \w or \W (i.e. - one matches \w and the other matches \W), or the start or end of the - string if the first or last character matches \w, respectively. When - PCRE2 is built with Unicode support, the meanings of \w and \W can be + A word boundary is a position in the subject string where the current + character and the previous character do not both match \w or \W (i.e. + one matches \w and the other matches \W), or the start or end of the + string if the first or last character matches \w, respectively. When + PCRE2 is built with Unicode support, the meanings of \w and \W can be changed by setting the PCRE2_UCP option. When this is done, it also af- - fects \b and \B. Neither PCRE2 nor Perl has a separate "start of word" - or "end of word" metasequence. However, whatever follows \b normally - determines which it is. For example, the fragment \ba matches "a" at + fects \b and \B. Neither PCRE2 nor Perl has a separate "start of word" + or "end of word" metasequence. However, whatever follows \b normally + determines which it is. For example, the fragment \ba matches "a" at the start of a word. - The \A, \Z, and \z assertions differ from the traditional circumflex + The \A, \Z, and \z assertions differ from the traditional circumflex and dollar (described in the next section) in that they only ever match - at the very start and end of the subject string, whatever options are - set. Thus, they are independent of multiline mode. These three asser- - tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options, - which affect only the behaviour of the circumflex and dollar metachar- - acters. However, if the startoffset argument of pcre2_match() is non- - zero, indicating that matching is to start at a point other than the - beginning of the subject, \A can never match. The difference between - \Z and \z is that \Z matches before a newline at the end of the string + at the very start and end of the subject string, whatever options are + set. Thus, they are independent of multiline mode. These three asser- + tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options, + which affect only the behaviour of the circumflex and dollar metachar- + acters. However, if the startoffset argument of pcre2_match() is non- + zero, indicating that matching is to start at a point other than the + beginning of the subject, \A can never match. The difference between + \Z and \z is that \Z matches before a newline at the end of the string as well as at the very end, whereas \z matches only at the end. - The \G assertion is true only when the current matching position is at - the start point of the matching process, as specified by the startoff- - set argument of pcre2_match(). It differs from \A when the value of - startoffset is non-zero. By calling pcre2_match() multiple times with - appropriate arguments, you can mimic Perl's /g option, and it is in + The \G assertion is true only when the current matching position is at + the start point of the matching process, as specified by the startoff- + set argument of pcre2_match(). It differs from \A when the value of + startoffset is non-zero. By calling pcre2_match() multiple times with + appropriate arguments, you can mimic Perl's /g option, and it is in this kind of implementation where \G can be useful. - Note, however, that PCRE2's implementation of \G, being true at the - starting character of the matching process, is subtly different from - Perl's, which defines it as true at the end of the previous match. In - Perl, these can be different when the previously matched string was + Note, however, that PCRE2's implementation of \G, being true at the + starting character of the matching process, is subtly different from + Perl's, which defines it as true at the end of the previous match. In + Perl, these can be different when the previously matched string was empty. Because PCRE2 does just one match at a time, it cannot reproduce this behaviour. - If all the alternatives of a pattern begin with \G, the expression is + If all the alternatives of a pattern begin with \G, the expression is anchored to the starting match position, and the "anchored" flag is set in the compiled regular expression. CIRCUMFLEX AND DOLLAR - The circumflex and dollar metacharacters are zero-width assertions. - That is, they test for a particular condition being true without con- + The circumflex and dollar metacharacters are zero-width assertions. + That is, they test for a particular condition being true without con- suming any characters from the subject string. These two metacharacters - are concerned with matching the starts and ends of lines. If the new- - line convention is set so that only the two-character sequence CRLF is - recognized as a newline, isolated CR and LF characters are treated as + are concerned with matching the starts and ends of lines. If the new- + line convention is set so that only the two-character sequence CRLF is + recognized as a newline, isolated CR and LF characters are treated as ordinary data characters, and are not recognized as newlines. Outside a character class, in the default matching mode, the circumflex - character is an assertion that is true only if the current matching - point is at the start of the subject string. If the startoffset argu- - ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum- - flex can never match if the PCRE2_MULTILINE option is unset. Inside a - character class, circumflex has an entirely different meaning (see be- + character is an assertion that is true only if the current matching + point is at the start of the subject string. If the startoffset argu- + ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum- + flex can never match if the PCRE2_MULTILINE option is unset. Inside a + character class, circumflex has an entirely different meaning (see be- low). - Circumflex need not be the first character of the pattern if a number - of alternatives are involved, but it should be the first thing in each - alternative in which it appears if the pattern is ever to match that - branch. If all possible alternatives start with a circumflex, that is, - if the pattern is constrained to match only at the start of the sub- - ject, it is said to be an "anchored" pattern. (There are also other + Circumflex need not be the first character of the pattern if a number + of alternatives are involved, but it should be the first thing in each + alternative in which it appears if the pattern is ever to match that + branch. If all possible alternatives start with a circumflex, that is, + if the pattern is constrained to match only at the start of the sub- + ject, it is said to be an "anchored" pattern. (There are also other constructs that can cause a pattern to be anchored.) - The dollar character is an assertion that is true only if the current - matching point is at the end of the subject string, or immediately be- - fore a newline at the end of the string (by default), unless PCRE2_NO- - TEOL is set. Note, however, that it does not actually match the new- - line. Dollar need not be the last character of the pattern if a number - of alternatives are involved, but it should be the last item in any - branch in which it appears. Dollar has no special meaning in a charac- + The dollar character is an assertion that is true only if the current + matching point is at the end of the subject string, or immediately be- + fore a newline at the end of the string (by default), unless PCRE2_NO- + TEOL is set. Note, however, that it does not actually match the new- + line. Dollar need not be the last character of the pattern if a number + of alternatives are involved, but it should be the last item in any + branch in which it appears. Dollar has no special meaning in a charac- ter class. - The meaning of dollar can be changed so that it matches only at the - very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at + The meaning of dollar can be changed so that it matches only at the + very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at compile time. This does not affect the \Z assertion. The meanings of the circumflex and dollar metacharacters are changed if - the PCRE2_MULTILINE option is set. When this is the case, a dollar - character matches before any newlines in the string, as well as at the - very end, and a circumflex matches immediately after internal newlines - as well as at the start of the subject string. It does not match after - a newline that ends the string, for compatibility with Perl. However, + the PCRE2_MULTILINE option is set. When this is the case, a dollar + character matches before any newlines in the string, as well as at the + very end, and a circumflex matches immediately after internal newlines + as well as at the start of the subject string. It does not match after + a newline that ends the string, for compatibility with Perl. However, this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option. - For example, the pattern /^abc$/ matches the subject string "def\nabc" - (where \n represents a newline) in multiline mode, but not otherwise. - Consequently, patterns that are anchored in single line mode because - all branches start with ^ are not anchored in multiline mode, and a - match for circumflex is possible when the startoffset argument of - pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored + For example, the pattern /^abc$/ matches the subject string "def\nabc" + (where \n represents a newline) in multiline mode, but not otherwise. + Consequently, patterns that are anchored in single line mode because + all branches start with ^ are not anchored in multiline mode, and a + match for circumflex is possible when the startoffset argument of + pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set. - When the newline convention (see "Newline conventions" below) recog- - nizes the two-character sequence CRLF as a newline, this is preferred, - even if the single characters CR and LF are also recognized as new- - lines. For example, if the newline convention is "any", a multiline - mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather - than after CR, even though CR on its own is a valid newline. (It also + When the newline convention (see "Newline conventions" below) recog- + nizes the two-character sequence CRLF as a newline, this is preferred, + even if the single characters CR and LF are also recognized as new- + lines. For example, if the newline convention is "any", a multiline + mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather + than after CR, even though CR on its own is a valid newline. (It also matches at the very start of the string, of course.) - Note that the sequences \A, \Z, and \z can be used to match the start - and end of the subject in both modes, and if all branches of a pattern - start with \A it is always anchored, whether or not PCRE2_MULTILINE is + Note that the sequences \A, \Z, and \z can be used to match the start + and end of the subject in both modes, and if all branches of a pattern + start with \A it is always anchored, whether or not PCRE2_MULTILINE is set. FULL STOP (PERIOD, DOT) AND \N Outside a character class, a dot in the pattern matches any one charac- - ter in the subject string except (by default) a character that signi- + ter in the subject string except (by default) a character that signi- fies the end of a line. One or more characters may be specified as line terminators (see "Newline conventions" above). - Dot never matches a single line-ending character. When the two-charac- - ter sequence CRLF is the only line ending, dot does not match CR if it - is immediately followed by LF, but otherwise it matches all characters - (including isolated CRs and LFs). When ANYCRLF is selected for line - endings, no occurrences of CR of LF match dot. When all Unicode line + Dot never matches a single line-ending character. When the two-charac- + ter sequence CRLF is the only line ending, dot does not match CR if it + is immediately followed by LF, but otherwise it matches all characters + (including isolated CRs and LFs). When ANYCRLF is selected for line + endings, no occurrences of CR of LF match dot. When all Unicode line endings are being recognized, dot does not match CR or LF or any of the other line ending characters. - The behaviour of dot with regard to newlines can be changed. If the - PCRE2_DOTALL option is set, a dot matches any one character, without - exception. If the two-character sequence CRLF is present in the sub- + The behaviour of dot with regard to newlines can be changed. If the + PCRE2_DOTALL option is set, a dot matches any one character, without + exception. If the two-character sequence CRLF is present in the sub- ject string, it takes two dots to match it. - The handling of dot is entirely independent of the handling of circum- - flex and dollar, the only relationship being that they both involve + The handling of dot is entirely independent of the handling of circum- + flex and dollar, the only relationship being that they both involve newlines. Dot has no special meaning in a character class. - The escape sequence \N when not followed by an opening brace behaves - like a dot, except that it is not affected by the PCRE2_DOTALL option. - In other words, it matches any character except one that signifies the + The escape sequence \N when not followed by an opening brace behaves + like a dot, except that it is not affected by the PCRE2_DOTALL option. + In other words, it matches any character except one that signifies the end of a line. When \N is followed by an opening brace it has a different meaning. See - the section entitled "Non-printing characters" above for details. Perl - also uses \N{name} to specify characters by Unicode name; PCRE2 does + the section entitled "Non-printing characters" above for details. Perl + also uses \N{name} to specify characters by Unicode name; PCRE2 does not support this. MATCHING A SINGLE CODE UNIT - Outside a character class, the escape sequence \C matches any one code - unit, whether or not a UTF mode is set. In the 8-bit library, one code - unit is one byte; in the 16-bit library it is a 16-bit unit; in the - 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches - line-ending characters. The feature is provided in Perl in order to + Outside a character class, the escape sequence \C matches any one code + unit, whether or not a UTF mode is set. In the 8-bit library, one code + unit is one byte; in the 16-bit library it is a 16-bit unit; in the + 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches + line-ending characters. The feature is provided in Perl in order to match individual bytes in UTF-8 mode, but it is unclear how it can use- fully be used. - Because \C breaks up characters into individual code units, matching - one unit with \C in UTF-8 or UTF-16 mode means that the rest of the + Because \C breaks up characters into individual code units, matching + one unit with \C in UTF-8 or UTF-16 mode means that the rest of the string may start with a malformed UTF character. This has undefined re- sults, because PCRE2 assumes that it is matching character by character in a valid UTF string (by default it checks the subject string's valid- - ity at the start of processing unless the PCRE2_NO_UTF_CHECK or + ity at the start of processing unless the PCRE2_NO_UTF_CHECK or PCRE2_MATCH_INVALID_UTF option is used). - An application can lock out the use of \C by setting the - PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also + An application can lock out the use of \C by setting the + PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to build PCRE2 with the use of \C permanently disabled. - PCRE2 does not allow \C to appear in lookbehind assertions (described - below) in UTF-8 or UTF-16 modes, because this would make it impossible - to calculate the length of the lookbehind. Neither the alternative + PCRE2 does not allow \C to appear in lookbehind assertions (described + below) in UTF-8 or UTF-16 modes, because this would make it impossible + to calculate the length of the lookbehind. Neither the alternative matching function pcre2_dfa_match() nor the JIT optimizer support \C in these UTF modes. The former gives a match-time error; the latter fails to optimize and so the match is always run using the interpreter. - In the 32-bit library, however, \C is always supported (when not ex- - plicitly locked out) because it always matches a single code unit, + In the 32-bit library, however, \C is always supported (when not ex- + plicitly locked out) because it always matches a single code unit, whether or not UTF-32 is specified. In general, the \C escape sequence is best avoided. However, one way of - using it that avoids the problem of malformed UTF-8 or UTF-16 charac- - ters is to use a lookahead to check the length of the next character, - as in this pattern, which could be used with a UTF-8 string (ignore + using it that avoids the problem of malformed UTF-8 or UTF-16 charac- + ters is to use a lookahead to check the length of the next character, + as in this pattern, which could be used with a UTF-8 string (ignore white space and line breaks): (?| (?=[\x00-\x7f])(\C) | @@ -7672,11 +8022,11 @@ MATCHING A SINGLE CODE UNIT (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) | (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C)) - In this example, a group that starts with (?| resets the capturing - parentheses numbers in each alternative (see "Duplicate Group Numbers" + In this example, a group that starts with (?| resets the capturing + parentheses numbers in each alternative (see "Duplicate Group Numbers" below). The assertions at the start of each branch check the next UTF-8 - character for values whose encoding uses 1, 2, 3, or 4 bytes, respec- - tively. The character's individual bytes are then captured by the ap- + character for values whose encoding uses 1, 2, 3, or 4 bytes, respec- + tively. The character's individual bytes are then captured by the ap- propriate number of \C groups. @@ -7684,27 +8034,27 @@ SQUARE BRACKETS AND CHARACTER CLASSES An opening square bracket introduces a character class, terminated by a closing square bracket. A closing square bracket on its own is not spe- - cial by default. If a closing square bracket is required as a member + cial by default. If a closing square bracket is required as a member of the class, it should be the first data character in the class (after - an initial circumflex, if present) or escaped with a backslash. This - means that, by default, an empty class cannot be defined. However, if - the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at + an initial circumflex, if present) or escaped with a backslash. This + means that, by default, an empty class cannot be defined. However, if + the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at the start does end the (empty) class. - A character class matches a single character in the subject. A matched + A character class matches a single character in the subject. A matched character must be in the set of characters defined by the class, unless - the first character in the class definition is a circumflex, in which + the first character in the class definition is a circumflex, in which case the subject character must not be in the set defined by the class. - If a circumflex is actually required as a member of the class, ensure + If a circumflex is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash. - For example, the character class [aeiou] matches any lower case vowel, - while [^aeiou] matches any character that is not a lower case vowel. - Note that a circumflex is just a convenient notation for specifying the - characters that are in the class by enumerating those that are not. A - class that starts with a circumflex is not an assertion; it still con- - sumes a character from the subject string, and therefore it fails if - the current pointer is at the end of the string. + For example, the character class [aeiou] matches any lower case English + vowel, whereas [^aeiou] matches all other characters. Note that a cir- + cumflex is just a convenient notation for specifying the characters + that are in the class by enumerating those that are not. A class that + starts with a circumflex is not an assertion; it still consumes a char- + acter from the subject string, and therefore it fails to match if the + current pointer is at the end of the string. Characters in a class may be specified by their code points using \o, \x, or \N{U+hh..} in the usual way. When caseless matching is set, any @@ -7714,7 +8064,10 @@ SQUARE BRACKETS AND CHARACTER CLASSES would. Note that there are two ASCII characters, K and S, that, in ad- dition to their lower case ASCII equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) respectively when ei- - ther PCRE2_UTF or PCRE2_UCP is set. + ther PCRE2_UTF or PCRE2_UCP is set. If you do not want these ASCII/non- + ASCII case equivalences, you can suppress them by setting PCRE2_EX- + TRA_CASELESS_RESTRICT, either as an option in a compile context, or by + including (*CASELESS_RESTRICT) or (?r) within a pattern. Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending se- @@ -7743,67 +8096,171 @@ SQUARE BRACKETS AND CHARACTER CLASSES last character in the class, or immediately after a range. For example, [b-d-z] matches letters in the range b to d, a hyphen character, or z. + There is some special treatment for alphabetic ranges in EBCDIC envi- + ronments; see the section "EBCDIC environments" below. + Perl treats a hyphen as a literal if it appears before or after a POSIX class (see below) or before or after a character type escape such as \d - or \H. However, unless the hyphen is the last character in the class, - Perl outputs a warning in its warning mode, as this is most likely a - user error. As PCRE2 has no facility for warning, an error is given in + or \H. However, unless the hyphen is the last character in the class, + Perl outputs a warning in its warning mode, as this is most likely a + user error. As PCRE2 has no facility for warning, an error is given in these cases. It is not possible to have the literal character "]" as the end charac- - ter of a range. A pattern such as [W-]46] is interpreted as a class of - two characters ("W" and "-") followed by a literal string "46]", so it - would match "W46]" or "-46]". However, if the "]" is escaped with a - backslash it is interpreted as the end of range, so [W-\]46] is inter- - preted as a class containing a range followed by two other characters. - The octal or hexadecimal representation of "]" can also be used to end - a range. + ter of a range. A pattern such as [W-]46] is interpreted as a class of + two characters ("W" and "-") followed by a literal string "46]", so it + would match "W46]" or "-46]". However, if the "]" is escaped with a + backslash it is interpreted as the end of a range, so [W-\]46] is in- + terpreted as a class containing a range and two other characters. The + octal or hexadecimal representation of "]" can also be used to end a + range. Ranges normally include all code points between the start and end char- - acters, inclusive. They can also be used for code points specified nu- - merically, for example [\000-\037]. Ranges can include any characters - that are valid for the current mode. In any UTF mode, the so-called - "surrogate" characters (those whose code points lie between 0xd800 and - 0xdfff inclusive) may not be specified explicitly by default (the - PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How- + acters, inclusive. They can also be used for code points specified nu- + merically, for example [\000-\037]. Ranges can include any characters + that are valid for the current mode. In any UTF mode, the so-called + "surrogate" characters (those whose code points lie between 0xd800 and + 0xdfff inclusive) may not be specified explicitly by default (the + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How- ever, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates, are always permitted. - There is a special case in EBCDIC environments for ranges whose end - points are both specified as literal letters in the same case. For com- - patibility with Perl, EBCDIC code points within the range that are not - letters are omitted. For example, [h-k] matches only four characters, - even though the codes for h and k are 0x88 and 0x92, a range of 11 code - points. However, if the range is specified numerically, for example, - [\x88-\x92] or [h-\x92], all code points are included. - If a range that includes letters is used when caseless matching is set, it matches the letters in either case. For example, [W-c] is equivalent - to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if - character tables for a French locale are in use, [\xc8-\xcb] matches + to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if + character tables for a French locale are in use, [\xc8-\xcb] matches accented E characters in both cases. - A circumflex can conveniently be used with the upper case character - types to specify a more restricted set of characters than the matching - lower case type. For example, the class [^\W_] matches any letter or + A circumflex can conveniently be used with the upper case character + types to specify a more restricted set of characters than the matching + lower case type. For example, the class [^\W_] matches any letter or digit, but not underscore, whereas [\w] includes underscore. A positive character class should be read as "something OR something OR ..." and a negative class as "NOT something AND NOT something AND NOT ...". - The only metacharacters that are recognized in character classes are - backslash, hyphen (only where it can be interpreted as specifying a - range), circumflex (only at the start), opening square bracket (only - when it can be interpreted as introducing a POSIX class name, or for a - special compatibility feature - see the next two sections), and the - terminating closing square bracket. However, escaping other non-al- - phanumeric characters does no harm. + The metacharacters that are recognized in character classes are back- + slash, hyphen (when it can be interpreted as specifying a range), cir- + cumflex (only at the start), and the terminating closing square + bracket. An opening square bracket is also special when it can be in- + terpreted as introducing a POSIX class (see "Posix character classes" + below), or a special compatibility feature (see "Compatibility feature + for word boundaries" below. Escaping any non-alphanumeric character in + a class turns it into a literal, whether or not it would otherwise be a + metacharacter. + + +PERL EXTENDED CHARACTER CLASSES + + From release 10.45 PCRE2 supports Perl's (?[...]) extended character + class syntax. This can be used to perform set operations such as inter- + section on character classes. + + The syntax permitted within (?[...]) is quite different to ordinary + character classes. Inside the extended class, there is an expression + syntax consisting of "atoms", operators, and ordinary parentheses "()" + used for grouping. Such classes always have the Perl /xx modifier + (PCRE2 option PCRE2_EXTENDED_MORE) turned on within them. This means + that literal space and tab characters are ignored everywhere in the + class. + + The allowed atoms are individual characters specified by escape se- + quences such as \n or \x{123}, character types such as \d, POSIX + classes such as [:alpha:], and nested ordinary (non-extended) character + classes. For example, in (?[\d & [...]]) the nested class [...] follows + the usual rules for ordinary character classes, in which parentheses + are not metacharacters, and character literals and ranges are permit- + ted. + + Character literals and ranges may not appear outside a nested ordinary + character class because they are not atoms in the extended syntax. The + extended syntax does not introduce any additional escape sequences, so + (?[\y]) is an unknown escape, as it would be in [\y]. + + In the extended syntax, ^ does not negate a class (except within an or- + dinary class nested inside an extended class); it is instead a binary + operator. + + The binary operators are "&" (intersection), "|" or "+" (union), "-" + (subtraction) and "^" (symmetric difference). These are left-associa- + tive and "&" has higher (tighter) precedence, while the others have + equal lower precedence. The one prefix unary operator is "!" (comple- + ment), with highest precedence. + + +UTS#18 EXTENDED CHARACTER CLASSES + + The PCRE2_ALT_EXTENDED_CLASS option enables an alternative to Perl's + (?[...]) syntax, allowing instead extended class behaviour inside or- + dinary [...] character classes. This altered syntax for [...] classes + is loosely described by the Unicode standard UTS#18. The PCRE2_ALT_EX- + TENDED_CLASS option does not prevent use of (?[...]) classes; it just + changes the meaning of all [...] classes that are not nested inside a + Perl (?[...]) class. + + Firstly, in ordinary Perl [...] syntax, an expression such as "[a[]" is + a character class with two literal characters "a" and "[", but in + UTS#18 extended classes the "[" character becomes an additional + metacharacter within classes, denoting the start of a nested class, so + a literal "[" must be escaped as "\[". + + Secondly, within the UTS#18 extended syntax, there are operators "||", + "&&", "--" and "~~" which denote character class union, intersection, + subtraction, and symmetric difference respectively. In standard Perl + syntax, these would simply be needlessly-repeated literals (except for + "--" which could be the start or end of a range). In UTS#18 extended + classes these operators can be used in constructs such as [\p{L}--[QW]] + for "Unicode letters, other than Q and W". A literal "-" at the start + or end of a range must be escaped, so while "[--1]" in Perl syntax is + the range from hyphen to "1", it must be escaped as "[\--1]" in UTS#18 + extended classes. + + Unlike Perl's (?[...]) extended classes, the PCRE2_EXTENDED_MORE option + to ignore space and tab characters is not automatically enabled for + UTS#18 extended classes, but it is honoured if set. + + Extended UTS#18 classes can be nested, and nested classes are them- + selves extended classes (unlike Perl, where nested classes must be sim- + ple classes). For example, [\p{L}&&[\p{Thai}||\p{Greek}]] matches any + letter that is in the Thai or Greek scripts. Note that this means that + no special grouping characters (such as the parentheses used in Perl's + (?[...]) class syntax) are needed. + + Individual class items (literal characters, literal ranges, properties + such as \d or \p{...}, and nested classes) can be combined by juxtapo- + sition or by an operator. Juxtaposition is the implicit union operator, + and binds more tightly than any explicit operator. Thus a sequence of + literals and/or ranges behaves as if it is enclosed in square brackets. + For example, [A-Z0-9&&[^E8]] is the same as [[A-Z0-9]&&[^E8]], which + matches any upper case alphanumeric character except "E" or "8". + + Precedence between the explicit operators is not defined, so mixing op- + erators is a syntax error. For example, [A&&B--C] is an error, but + [A&&[B--C]] is valid. + + This is an emerging syntax which is being adopted gradually across the + regex ecosystem: for example JavaScript adopted the "/v" flag in EC- + MAScript 2024; Python's "re" module reserves the syntax for future use + with a FutureWarning for unescaped use of "[" as a literal within char- + acter classes. Due to UTS#18 providing insufficient guidance, engines + interpret the syntax differently. Rust's "regex" crate and Python's + "regex" PyPi module both implement UTS#18 extended classes, but with + slight incompatibilities ([A||B&&C] is parsed as [A||[B&&C]] in + Python's "regex" but as [[A||B]&&C] in Rust's "regex"). + + PCRE2's syntax adds syntax restrictions similar to ECMASCript's /v + flag, so that all the UTS#18 extended classes accepted as valid by + PCRE2 have the property that they are interpreted either with the same + behaviour, or as invalid, by all other major engines. Please file an + issue if you are aware of cross-engine differences in behaviour between + PCRE2 and another major engine. POSIX CHARACTER CLASSES Perl supports the POSIX notation for character classes. This uses names - enclosed by [: and :] within the enclosing square brackets. PCRE2 also - supports this notation. For example, + enclosed by [: and :] within the enclosing square brackets. PCRE2 also + supports this notation, in both ordinary and extended classes. For ex- + ample, [01[:alpha:]%] @@ -7883,7 +8340,7 @@ POSIX CHARACTER CLASSES In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" versions of those characters, whose Unicode code points start at U+FF10. This is a change that - was made in PCRE release 10.43 for Perl compatibility. + was made in PCRE2 release 10.43 for Perl compatibility. The other POSIX classes are unchanged by PCRE2_UCP, and match only characters with code points less than 256. @@ -8391,17 +8848,18 @@ REPETITION (?>.*?a)b It matches "ab" in the subject "aab". The use of the backtracking con- - trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and - there is an option, PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. + trol verbs (*PRUNE) and (*SKIP) also disable this optimization. To do + so explicitly, either pass the compile option PCRE2_NO_DOTSTAR_ANCHOR, + or call pcre2_set_optimize() with a PCRE2_DOTSTAR_ANCHOR_OFF directive. - When a capture group is repeated, the value captured is the substring + When a capture group is repeated, the value captured is the substring that matched the final iteration. For example, after (tweedle[dume]{3}\s*)+ has matched "tweedledum tweedledee" the value of the captured substring - is "tweedledee". However, if there are nested capture groups, the cor- - responding captured values may have been set in previous iterations. + is "tweedledee". However, if there are nested capture groups, the cor- + responding captured values may have been set in previous iterations. For example, after (a|(b))+ @@ -8411,57 +8869,57 @@ REPETITION ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS - With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") - repetition, failure of what follows normally causes the repeated item - to be re-evaluated to see if a different number of repeats allows the - rest of the pattern to match. Sometimes it is useful to prevent this, - either to change the nature of the match, or to cause it fail earlier - than it otherwise might, when the author of the pattern knows there is + With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") + repetition, failure of what follows normally causes the repeated item + to be re-evaluated to see if a different number of repeats allows the + rest of the pattern to match. Sometimes it is useful to prevent this, + either to change the nature of the match, or to cause it fail earlier + than it otherwise might, when the author of the pattern knows there is no point in carrying on. - Consider, for example, the pattern \d+foo when applied to the subject + Consider, for example, the pattern \d+foo when applied to the subject line 123456bar After matching all 6 digits and then failing to match "foo", the normal - action of the matcher is to try again with only 5 digits matching the - \d+ item, and then with 4, and so on, before ultimately failing. - "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides + action of the matcher is to try again with only 5 digits matching the + \d+ item, and then with 4, and so on, before ultimately failing. + "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides the means for specifying that once a group has matched, it is not to be re-evaluated in this way. - If we use atomic grouping for the previous example, the matcher gives - up immediately on failing to match "foo" the first time. The notation + If we use atomic grouping for the previous example, the matcher gives + up immediately on failing to match "foo" the first time. The notation is a kind of special parenthesis, starting with (?> as in this example: (?>\d+)foo - Perl 5.28 introduced an experimental alphabetic form starting with (* + Perl 5.28 introduced an experimental alphabetic form starting with (* which may be easier to remember: (*atomic:\d+)foo - This kind of parenthesized group "locks up" the part of the pattern it + This kind of parenthesized group "locks up" the part of the pattern it contains once it has matched, and a failure further into the pattern is - prevented from backtracking into it. Backtracking past it to previous + prevented from backtracking into it. Backtracking past it to previous items, however, works as normal. An alternative description is that a group of this type matches exactly - the string of characters that an identical standalone pattern would + the string of characters that an identical standalone pattern would match, if anchored at the current point in the subject string. - Atomic groups are not capture groups. Simple cases such as the above - example can be thought of as a maximizing repeat that must swallow - everything it can. So, while both \d+ and \d+? are prepared to adjust - the number of digits they match in order to make the rest of the pat- + Atomic groups are not capture groups. Simple cases such as the above + example can be thought of as a maximizing repeat that must swallow + everything it can. So, while both \d+ and \d+? are prepared to adjust + the number of digits they match in order to make the rest of the pat- tern match, (?>\d+) can only match an entire sequence of digits. - Atomic groups in general can of course contain arbitrarily complicated + Atomic groups in general can of course contain arbitrarily complicated expressions, and can be nested. However, when the contents of an atomic - group is just a single repeated item, as in the example above, a sim- - pler notation, called a "possessive quantifier" can be used. This con- - sists of an additional + character following a quantifier. Using this + group is just a single repeated item, as in the example above, a sim- + pler notation, called a "possessive quantifier" can be used. This con- + sists of an additional + character following a quantifier. Using this notation, the previous example can be rewritten as \d++foo @@ -8471,24 +8929,26 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS (abc|xyz){2,3}+ - Possessive quantifiers are always greedy; the setting of the PCRE2_UN- - GREEDY option is ignored. They are a convenient notation for the sim- - pler forms of atomic group. However, there is no difference in the - meaning of a possessive quantifier and the equivalent atomic group, - though there may be a performance difference; possessive quantifiers + Possessive quantifiers are always greedy; the setting of the PCRE2_UN- + GREEDY option is ignored. They are a convenient notation for the sim- + pler forms of atomic group. However, there is no difference in the + meaning of a possessive quantifier and the equivalent atomic group, + though there may be a performance difference; possessive quantifiers should be slightly faster. - The possessive quantifier syntax is an extension to the Perl 5.8 syn- - tax. Jeffrey Friedl originated the idea (and the name) in the first + The possessive quantifier syntax is an extension to the Perl 5.8 syn- + tax. Jeffrey Friedl originated the idea (and the name) in the first edition of his book. Mike McCloskey liked it, so implemented it when he - built Sun's Java package, and PCRE1 copied it from there. It found its + built Sun's Java package, and PCRE1 copied it from there. It found its way into Perl at release 5.10. - PCRE2 has an optimization that automatically "possessifies" certain - simple pattern constructs. For example, the sequence A+B is treated as - A++B because there is no point in backtracking into a sequence of A's - when B must follow. This feature can be disabled by the PCRE2_NO_AUTO- - POSSESS option, or starting the pattern with (*NO_AUTO_POSSESS). + PCRE2 has an optimization that automatically "possessifies" certain + simple pattern constructs. For example, the sequence A+B is treated as + A++B because there is no point in backtracking into a sequence of A's + when B must follow. This feature can be disabled by the + PCRE2_NO_AUTO_POSSESS option, by calling pcre2_set_optimize() with a + PCRE2_AUTO_POSSESS_OFF directive, or by starting the pattern with + (*NO_AUTO_POSSESS). When a pattern contains an unlimited repeat inside a group that can it- self be repeated an unlimited number of times, the use of an atomic @@ -8649,19 +9109,25 @@ BACKREFERENCES ASSERTIONS - An assertion is a test on the characters following or preceding the - current matching point that does not consume any characters. The simple - assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described - above. + An assertion is a test that does not consume any characters. The test + must succeed for the match to continue. The simple assertions coded as + \b, \B, \A, \G, \Z, \z, ^ and $ are described above. + + More complicated assertions are coded as parenthesized groups. If + matching such a group succeeds, matching continues after it, but with + the matching position in the subject string reset to what it was before + the assertion was processed. + + A special kind of assertion, called a "scan substring" assertion, + matches a subpattern against a previously captured substring. This is + described in the section entitled "Scan substring assertions" below. It + is a PCRE2 extension, not compatible with Perl. - More complicated assertions are coded as parenthesized groups. There - are two kinds: those that look ahead of the current position in the - subject string, and those that look behind it, and in each case an as- - sertion may be positive (must match for the assertion to be true) or - negative (must not match for the assertion to be true). An assertion - group is matched in the normal way, and if it is true, matching contin- - ues after it, but with the matching position in the subject string re- - set to what it was before the assertion was processed. + The other goup-based assertions are of two kinds: those that look ahead + of the current position in the subject string, and those that look be- + hind it, and in each case an assertion may be positive (must match for + the assertion to be true) or negative (must not match for the assertion + to be true). The Perl-compatible lookaround assertions are atomic. If an assertion is true, but there is a subsequent matching failure, there is no back- @@ -8928,6 +9394,66 @@ NON-ATOMIC ASSERTIONS groups (see below) must be atomic. +SCAN SUBSTRING ASSERTIONS + + A special kind of assertion, not compatible with Perl, makes it possi- + ble to check the contents of a captured substring by matching it with a + subpattern. Because this involves capturing, this feature is not sup- + ported by pcre2_dfa_match(). + + A scan substring assertion starts with the sequence (*scan_substring: + or (*scs: which is followed by a list of substring numbers (absolute or + relative) and/or substring names enclosed in single quotes or angle + brackets, all within parentheses. The rest of the item is the subpat- + tern that is applied to the substring, as shown in these examples: + + (*scan_substring:(1)...) + (*scs:(-2)...) + (*scs:('AB')...) + (*scs:(1,'AB',-2)...) + + The list of groups is checked in the order they are given, and it is + the contents of the first one that is found to be set that are scanned. + When PCRE2_DUPNAMES is set and there are ambiguous group names, all + groups with the same name are checked in numerical order. A scan sub- + string assertion fails if none of the groups it references have been + set. + + The pattern match on the substring is always anchored, that is, it must + match from the start of the substring. There is no "bumpalong" if it + does not match at the start. The end of the subject is temporarily re- + set to be the end of the substring, so \Z, \z, and $ will match there. + However, the start of the subject is not reset. This means that ^ + matches only if the substring is actually at the start of the main sub- + ject, but it also means that lookbehind assertions into what precedes + the substring are possible. + + Here is a very simple example: find a word that contains the rare (in + English) sequence of letters "rh" not at the start: + + \b(\w++)(*scs:(1).+rh) + + The first group captures a word which is then scanned by the second + group. This example does not actually need this heavyweight feature; + the same match can be achieved with: + + \b\w+?rh\w*\b + + When things are more complicated, however, scanning a captured sub- + string can be a useful way to describe the required match. For exmple, + there is a rather complicated pattern in the PCRE2 test data that + checks an entire subject string for a palindrome, that is, the sequence + of letters is the same in both directions. Suppose you want to search + for individual words of two or more characters such as "level" that are + palindromes: + + (\b\w{2,}+\b)(*scs:(1)...palindrome-matching-pattern...) + + Within a substring scanning subpattern, references to other groups work + as normal. Capturing groups may appear, and will retain their values + during ongoing matching if the assertion succeeds. + + SCRIPT RUNS In concept, a script run is a sequence of characters that are all from @@ -9175,8 +9701,9 @@ COMMENTS There are two ways of including comments in patterns that are processed by PCRE2. In both cases, the start of the comment must not be in a character class, nor in the middle of any other sequence of related - characters such as (?: or a group name or number. The characters that - make up a comment play no part in the pattern matching. + characters such as (?: or a group name or number or a Unicode property + name. The characters that make up a comment play no part in the pattern + matching. The sequence (?# marks the start of a comment that continues up to the next closing parenthesis. Nested parentheses are not permitted. If the @@ -9459,8 +9986,9 @@ CALLOUTS provides an external function by putting its entry point in a match context using the function pcre2_set_callout(), and then passing that context to pcre2_match() or pcre2_dfa_match(). If no match context is - passed, or if the callout entry point is set to NULL, callouts are dis- - abled. + passed, or if the callout entry point is set to NULL, callout points + will be passed over silently during matching. To disallow callouts in + the pattern syntax, you may use the PCRE2_EXTRA_NEVER_CALLOUT option. Within a regular expression, (?C) indicates a point at which the external function is to be called. There are two kinds of callout: @@ -9555,10 +10083,10 @@ BACKTRACKING CONTROL Since these verbs are specifically related to backtracking, most of them can be used only when the pattern is to be matched using the tra- - ditional matching function, because that uses a backtracking algorithm. - With the exception of (*FAIL), which behaves like a failing negative - assertion, the backtracking control verbs cause an error if encountered - by the DFA matching function. + ditional matching function or JIT, because they use backtracking algo- + rithms. With the exception of (*FAIL), which behaves like a failing + negative assertion, the backtracking control verbs cause an error if + encountered by the DFA matching function. The behaviour of these verbs in repeated groups, assertions, and in capture groups called as subroutines (whether or not recursively) is @@ -9573,11 +10101,12 @@ BACKTRACKING CONTROL running of a match, any included backtracking verbs will not, of course, be processed. You can suppress the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com- - pile(), or by starting the pattern with (*NO_START_OPT). There is more - discussion of this option in the section entitled "Compiling a pattern" - in the pcre2api documentation. + pile(), by calling pcre2_set_optimize() with a PCRE2_START_OPTIMIZE_OFF + directive, or by starting the pattern with (*NO_START_OPT). There is + more discussion of this option in the section entitled "Compiling a + pattern" in the pcre2api documentation. - Experiments with Perl suggest that it too has similar optimizations, + Experiments with Perl suggest that it too has similar optimizations, and like PCRE2, turning them off can change the result of a match. Verbs that act immediately @@ -9586,77 +10115,77 @@ BACKTRACKING CONTROL (*ACCEPT) or (*ACCEPT:NAME) - This verb causes the match to end successfully, skipping the remainder - of the pattern. However, when it is inside a capture group that is + This verb causes the match to end successfully, skipping the remainder + of the pattern. However, when it is inside a capture group that is called as a subroutine, only that group is ended successfully. Matching then continues at the outer level. If (*ACCEPT) in triggered in a posi- - tive assertion, the assertion succeeds; in a negative assertion, the + tive assertion, the assertion succeeds; in a negative assertion, the assertion fails. - If (*ACCEPT) is inside capturing parentheses, the data so far is cap- + If (*ACCEPT) is inside capturing parentheses, the data so far is cap- tured. For example: A((?:A|B(*ACCEPT)|C)D) - This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap- + This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap- tured by the outer parentheses. - (*ACCEPT) is the only backtracking verb that is allowed to be quanti- - fied because an ungreedy quantification with a minimum of zero acts + (*ACCEPT) is the only backtracking verb that is allowed to be quanti- + fied because an ungreedy quantification with a minimum of zero acts only when a backtrack happens. Consider, for example, (A(*ACCEPT)??B)C - where A, B, and C may be complex expressions. After matching "A", the - matcher processes "BC"; if that fails, causing a backtrack, (*ACCEPT) - is triggered and the match succeeds. In both cases, all but C is cap- - tured. Whereas (*COMMIT) (see below) means "fail on backtrack", a re- + where A, B, and C may be complex expressions. After matching "A", the + matcher processes "BC"; if that fails, causing a backtrack, (*ACCEPT) + is triggered and the match succeeds. In both cases, all but C is cap- + tured. Whereas (*COMMIT) (see below) means "fail on backtrack", a re- peated (*ACCEPT) of this type means "succeed on backtrack". - Warning: (*ACCEPT) should not be used within a script run group, be- - cause it causes an immediate exit from the group, bypassing the script + Warning: (*ACCEPT) should not be used within a script run group, be- + cause it causes an immediate exit from the group, bypassing the script run checking. (*FAIL) or (*FAIL:NAME) - This verb causes a matching failure, forcing backtracking to occur. It - may be abbreviated to (*F). It is equivalent to (?!) but easier to + This verb causes a matching failure, forcing backtracking to occur. It + may be abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl documentation notes that it is probably useful only when combined with (?{}) or (??{}). Those are, of course, Perl features that - are not present in PCRE2. The nearest equivalent is the callout fea- + are not present in PCRE2. The nearest equivalent is the callout fea- ture, as for example in this pattern: a+(?C)(*FAIL) - A match with the string "aaaa" always fails, but the callout is taken + A match with the string "aaaa" always fails, but the callout is taken before each backtrack happens (in this example, 10 times). - (*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*AC- - CEPT) and (*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is + (*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*AC- + CEPT) and (*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is recorded just before the verb acts. Recording which path was taken - There is one verb whose main purpose is to track how a match was ar- - rived at, though it also has a secondary use in conjunction with ad- + There is one verb whose main purpose is to track how a match was ar- + rived at, though it also has a secondary use in conjunction with ad- vancing the match starting point (see (*SKIP) below). (*MARK:NAME) or (*:NAME) - A name is always required with this verb. For all the other backtrack- + A name is always required with this verb. For all the other backtrack- ing control verbs, a NAME argument is optional. - When a match succeeds, the name of the last-encountered mark name on + When a match succeeds, the name of the last-encountered mark name on the matching path is passed back to the caller as described in the sec- tion entitled "Other information about the match" in the pcre2api docu- - mentation. This applies to all instances of (*MARK) and other verbs, + mentation. This applies to all instances of (*MARK) and other verbs, including those inside assertions and atomic groups. However, there are - differences in those cases when (*MARK) is used in conjunction with + differences in those cases when (*MARK) is used in conjunction with (*SKIP) as described below. - The mark name that was last encountered on the matching path is passed - back. A verb without a NAME argument is ignored for this purpose. Here - is an example of pcre2test output, where the "mark" modifier requests + The mark name that was last encountered on the matching path is passed + back. A verb without a NAME argument is ignored for this purpose. Here + is an example of pcre2test output, where the "mark" modifier requests the retrieval and outputting of (*MARK) data: re> /X(*MARK:A)Y|X(*MARK:B)Z/mark @@ -9668,30 +10197,31 @@ BACKTRACKING CONTROL MK: B The (*MARK) name is tagged with "MK:" in this output, and in this exam- - ple it indicates which of the two alternatives matched. This is a more - efficient way of obtaining this information than putting each alterna- + ple it indicates which of the two alternatives matched. This is a more + efficient way of obtaining this information than putting each alterna- tive in its own capturing parentheses. - If a verb with a name is encountered in a positive assertion that is - true, the name is recorded and passed back if it is the last-encoun- + If a verb with a name is encountered in a positive assertion that is + true, the name is recorded and passed back if it is the last-encoun- tered. This does not happen for negative assertions or failing positive assertions. - After a partial match or a failed match, the last encountered name in + After a partial match or a failed match, the last encountered name in the entire match process is returned. For example: re> /X(*MARK:A)Y|X(*MARK:B)Z/mark data> XP No match, mark = B - Note that in this unanchored example the mark is retained from the + Note that in this unanchored example the mark is retained from the match attempt that started at the letter "X" in the subject. Subsequent match attempts starting at "P" and then with an empty string do not get as far as the (*MARK) item, but nevertheless do not reset it. - If you are interested in (*MARK) values after failed matches, you - should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to - ensure that the match is always attempted. + If you are interested in (*MARK) values after failed matches, you + should probably either set the PCRE2_NO_START_OPTIMIZE option or call + pcre2_set_optimize() with a PCRE2_START_OPTIMIZE_OFF directive (see + above) to ensure that the match is always attempted. Verbs that act after backtracking @@ -9699,11 +10229,11 @@ BACKTRACKING CONTROL tinues with what follows, but if there is a subsequent match failure, causing a backtrack to the verb, a failure is forced. That is, back- tracking cannot pass to the left of the verb. However, when one of - these verbs appears inside an atomic group or in a lookaround assertion - that is true, its effect is confined to that group, because once the - group has been matched, there is never any backtracking into it. Back- - tracking from beyond an assertion or an atomic group ignores the entire - group, and seeks a preceding backtracking point. + these verbs appears inside an atomic group or in an atomic lookaround + assertion that is true, its effect is confined to that group, because + once the group has been matched, there is never any backtracking into + it. Backtracking from beyond an atomic assertion or group ignores the + entire group, and seeks a preceding backtracking point. These verbs differ in exactly what kind of failure occurs when back- tracking reaches them. The behaviour described below is what happens @@ -9960,21 +10490,23 @@ BACKTRACKING CONTROL (*MARK) name that is set in an assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern. - PCRE2 now supports non-atomic positive assertions, as described in the - section entitled "Non-atomic assertions" above. These assertions must - be standalone (not used as conditions). They are not Perl-compatible. - For these assertions, a later backtrack does jump back into the asser- - tion, and therefore verbs such as (*COMMIT) can be triggered by back- - tracks from later in the pattern. + PCRE2 now supports non-atomic positive assertions and also "scan sub- + string" assertions, as described in the sections entitled "Non-atomic + assertions" and "Scan substring assertions" above. These assertions + must be standalone (not used as conditions). They are not Perl-compati- + ble. For these assertions, a later backtrack does jump back into the + assertion, and therefore verbs such as (*COMMIT) can be triggered by + backtracks from later in the pattern. The effect of (*THEN) is not allowed to escape beyond an assertion. If there are no more branches to try, (*THEN) causes a positive assertion - to be false, and a negative assertion to be true. + to be false, and a negative assertion to be true. This behaviour dif- + fers from Perl when the assertion has only one branch. - The other backtracking verbs are not treated specially if they appear - in a standalone positive assertion. In a conditional positive asser- + The other backtracking verbs are not treated specially if they appear + in a standalone positive assertion. In a conditional positive asser- tion, backtracking (from within the assertion) into (*COMMIT), (*SKIP), - or (*PRUNE) causes the condition to be false. However, for both stand- + or (*PRUNE) causes the condition to be false. However, for both stand- alone and conditional negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes the assertion to be true, without consider- ing any further alternative branches. @@ -9984,26 +10516,68 @@ BACKTRACKING CONTROL These behaviours occur whether or not the group is called recursively. (*ACCEPT) in a group called as a subroutine causes the subroutine match - to succeed without any further processing. Matching then continues af- - ter the subroutine call. Perl documents this behaviour. Perl's treat- + to succeed without any further processing. Matching then continues af- + ter the subroutine call. Perl documents this behaviour. Perl's treat- ment of the other verbs in subroutines is different in some cases. - (*FAIL) in a group called as a subroutine has its normal effect: it + (*FAIL) in a group called as a subroutine has its normal effect: it forces an immediate backtrack. - (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail - when triggered by being backtracked to in a group called as a subrou- + (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail + when triggered by being backtracked to in a group called as a subrou- tine. There is then a backtrack at the outer level. (*THEN), when triggered, skips to the next alternative in the innermost - enclosing group that has alternatives (its normal behaviour). However, + enclosing group that has alternatives (its normal behaviour). However, if there is no such group within the subroutine's group, the subroutine match fails and there is a backtrack at the outer level. +EBCDIC ENVIRONMENTS + + Differences in the way PCRE behaves when it is running in an EBCDIC en- + vironment are covered in this section. + + Escape sequences + + When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. + \a, \e, \f, \n, \r, and \t generate the appropriate EBCDIC code values. + The \c escape is processed as specified for Perl in the perlebcdic doc- + ument. The only characters that are allowed after \c are A-Z, a-z, or + one of @, [, \, ], ^, _, or ?. Any other character provokes a compile- + time error. The sequence \c@ encodes character code 0; after \c the + letters (in either case) encode characters 1-26 (hex 01 to hex 1A); [, + \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and \c? be- + comes either 255 (hex FF) or 95 (hex 5F). + + Thus, apart from \c?, these escapes generate the same character code + values as they do in an ASCII or Unicode environment, though the mean- + ings of the values mostly differ. For example, \cG always generates + code value 7, which is BEL in ASCII but DEL in EBCDIC. + + The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, + but because 127 is not a control character in EBCDIC, Perl makes it + generate the APC character. Unfortunately, there are several variants + of EBCDIC. In most of them the APC character has the value 255 (hex + FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If + certain other characters have POSIX-BC values, PCRE2 makes \c? generate + 95; otherwise it generates 255. + + Character classes + + In character classes there is a special case in EBCDIC environments for + ranges whose end points are both specified as literal letters in the + same case. For compatibility with Perl, EBCDIC code points within the + range that are not letters are omitted. For example, [h-k] matches only + four characters, even though the EBCDIC codes for h and k are 0x88 and + 0x92, a range of 11 code points. However, if the range is specified nu- + merically, for example, [\x88-\x92] or [h-\x92], all code points are + included. + + SEE ALSO - pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3), + pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3), pcre2(3). @@ -10016,15 +10590,14 @@ AUTHOR REVISION - Last updated: 04 June 2024 + Last updated: 27 November 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.44 04 June 2024 PCRE2PATTERN(3) +PCRE2 10.45-RC1 27 November 2024 PCRE2PATTERN(3) ------------------------------------------------------------------------------ - PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3) @@ -10272,15 +10845,14 @@ AUTHOR REVISION - Last updated: 27 July 2022 + Last updated: 06 December 2022 Copyright (c) 1997-2022 University of Cambridge. -PCRE2 10.41 27 July 2022 PCRE2PERFORM(3) +PCRE2 10.45-RC1 06 December 2022 PCRE2PERFORM(3) ------------------------------------------------------------------------------ - PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3) @@ -10431,7 +11003,7 @@ COMPILING A PATTERN When a pattern that is compiled with this flag is passed to pcre2_regexec() for matching, the nmatch and pmatch arguments are ig- - nored, and no captured strings are returned. Versions of the PCRE li- + nored, and no captured strings are returned. Versions of the PCRE2 li- brary prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile op- tion, but this no longer happens because it disables the use of back- references. @@ -10631,15 +11203,14 @@ AUTHOR REVISION - Last updated: 19 January 2024 + Last updated: 27 November 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 19 January 2024 PCRE2POSIX(3) +PCRE2 10.45-RC1 27 November 2024 PCRE2POSIX(3) ------------------------------------------------------------------------------ - PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3) @@ -10725,13 +11296,12 @@ AUTHOR REVISION - Last updated: 02 February 2016 + Last updated: 14 November 2023 Copyright (c) 1997-2016 University of Cambridge. -PCRE2 10.22 02 February 2016 PCRE2SAMPLE(3) +PCRE2 10.45-RC1 14 November 2023 PCRE2SAMPLE(3) ------------------------------------------------------------------------------ - PCRE2SERIALIZE(3) Library Functions Manual PCRE2SERIALIZE(3) @@ -10917,15 +11487,14 @@ AUTHOR REVISION - Last updated: 27 June 2018 + Last updated: 19 January 2024 Copyright (c) 1997-2018 University of Cambridge. -PCRE2 10.32 27 June 2018 PCRE2SERIALIZE(3) +PCRE2 10.45-RC1 19 January 2024 PCRE2SERIALIZE(3) ------------------------------------------------------------------------------ - PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3) @@ -10935,9 +11504,11 @@ NAME PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY - The full syntax and semantics of the regular expressions that are sup- - ported by PCRE2 are described in the pcre2pattern documentation. This - document contains a quick-reference summary of the syntax. + The full syntax and semantics of the regular expression patterns that + are supported by PCRE2 are described in the pcre2pattern documentation. + This document contains a quick-reference summary of the pattern syntax + followed by the syntax of replacement strings in substitution function. + The full description of the latter is in the pcre2api documentation. QUOTING @@ -10947,22 +11518,24 @@ QUOTING Note that white space inside \Q...\E is always treated as literal, even if PCRE2_EXTENDED is set, causing most other white space to be ignored. + Note also that PCRE2's handling of \Q...\E has some differences from + Perl's. See the pcre2pattern documentation for details. BRACED ITEMS - With one exception, wherever brace characters { and } are required to - enclose data for constructions such as \g{2} or \k{name}, space and/or - horizontal tab characters that follow { or precede } are allowed and + With one exception, wherever brace characters { and } are required to + enclose data for constructions such as \g{2} or \k{name}, space and/or + horizontal tab characters that follow { or precede } are allowed and are ignored. In the case of quantifiers, they may also appear before or - after the comma. The exception is \u{...} which is not Perl-compatible + after the comma. The exception is \u{...} which is not Perl-compatible and is recognized only when PCRE2_EXTRA_ALT_BSUX is set. This is an EC- MAScript compatibility feature, and follows ECMAScript's behaviour. ESCAPED CHARACTERS - This table applies to ASCII and Unicode environments. An unrecognized + This table applies to ASCII and Unicode environments. An unrecognized escape sequence causes an error. \a alarm, that is, the BEL character (hex 07) @@ -10979,6 +11552,11 @@ ESCAPED CHARACTERS \xhh character with hex code hh \x{hh..} character with hex code hh.. + \N{U+hh..} is synonymous with \x{hh..} but is not supported in environ- + ments that use EBCDIC code (mainly IBM mainframes). Note that \N not + followed by an opening curly bracket has a different meaning (see be- + low). + If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the following are also recognized: @@ -10986,20 +11564,17 @@ ESCAPED CHARACTERS \uhhhh character with hex code hhhh \u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX - When \x is not followed by {, from zero to two hexadecimal digits are - read, but in ALT_BSUX mode \x must be followed by two hexadecimal dig- - its to be recognized as a hexadecimal escape; otherwise it matches a - literal "x". Likewise, if \u (in ALT_BSUX mode) is not followed by - four hexadecimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex - digits in curly brackets, it matches a literal "u". + When \x is not followed by {, one or two hexadecimal digits are read, + but in ALT_BSUX mode \x must be followed by two hexadecimal digits to + be recognized as a hexadecimal escape; otherwise it matches a literal + "x". Likewise, if \u (in ALT_BSUX mode) is not followed by four hexa- + decimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in + curly brackets, it matches a literal "u". Note that \0dd is always an octal code. The treatment of backslash fol- - lowed by a non-zero digit is complicated; for details see the section - "Non-printing characters" in the pcre2pattern documentation, where de- - tails of escape processing in EBCDIC environments are also given. - \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not supported in - EBCDIC environments. Note that \N not followed by an opening curly - bracket has a different meaning (see below). + lowed by a non-zero digit is complicated; for details see the section + "Non-printing characters" in the pcre2pattern documentation, where de- + tails of escape processing in EBCDIC environments are also given. CHARACTER TYPES @@ -11023,23 +11598,24 @@ CHARACTER TYPES \W a "non-word" character \X a Unicode extended grapheme cluster - \C is dangerous because it may leave the current matching point in the + \C is dangerous because it may leave the current matching point in the middle of a UTF-8 or UTF-16 character. The application can lock out the - use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also + use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 with the use of \C permanently disabled. - By default, \d, \s, and \w match only ASCII characters, even in UTF-8 + By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode or in the 16-bit and 32-bit libraries. However, if locale-specific - matching is happening, \s and \w may also match characters with code + matching is happening, \s and \w may also match characters with code points in the range 128-255. If the PCRE2_UCP option is set, the behav- iour of these escape sequences is changed to use Unicode properties and - they match many more characters, but there are some option settings - that can restrict individual sequences to matching only ASCII charac- + they match many more characters, but there are some option settings + that can restrict individual sequences to matching only ASCII charac- ters. Property descriptions in \p and \P are matched caselessly; hyphens, un- - derscores, and white space are ignored, in accordance with Unicode's - "loose matching" rules. + derscores, and ASCII white space characters are ignored, in accordance + with Unicode's "loose matching" rules. For example, \p{Bidi_Class=al} + is the same as \p{ bidi class = AL }. GENERAL CATEGORY PROPERTIES FOR \p and \P @@ -11052,13 +11628,13 @@ GENERAL CATEGORY PROPERTIES FOR \p and \P Cs Surrogate L Letter + Lc Cased letter, the union of Ll, Lu, and Lt + L& Synonym of Lc Ll Lower case letter Lm Modifier letter Lo Other letter Lt Title case letter Lu Upper case letter - Lc Ll, Lu, or Lt - L& Ll, Lu, or Lt M Mark Mc Spacing mark @@ -11090,6 +11666,9 @@ GENERAL CATEGORY PROPERTIES FOR \p and \P Zp Paragraph separator Zs Space separator + From release 10.45, when caseless matching is set, Ll, Lu, and Lt are + all equivalent to Lc. + PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P @@ -11106,9 +11685,9 @@ PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P BINARY PROPERTIES FOR \p AND \P - Unicode defines a number of binary properties, that is, properties - whose only values are true or false. You can obtain a list of those - that are recognized by \p and \P, along with their abbreviations, by + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by running this command: pcre2test -LP @@ -11116,8 +11695,8 @@ BINARY PROPERTIES FOR \p AND \P SCRIPT MATCHING WITH \p AND \P - Many script names and their 4-letter abbreviations are recognized in - \p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P + Many script names and their 4-letter abbreviations are recognized in + \p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of course). You can obtain a list of these scripts by running this com- mand: @@ -11153,7 +11732,7 @@ THE BIDI_CLASS PROPERTY FOR \p AND \P RLI right-to-left isolate RLO right-to-left override S segment separator - WS which space + WS white space CHARACTER CLASSES @@ -11179,10 +11758,50 @@ CHARACTER CLASSES word same as \w xdigit hexadecimal digit - In PCRE2, POSIX character set names recognize only ASCII characters by - default, but some of them use Unicode properties if PCRE2_UCP is set. + In PCRE2, POSIX character set names recognize only ASCII characters by + default, but some of them use Unicode properties if PCRE2_UCP is set. You can use \Q...\E inside a character class. + When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes + may be used, allowing nested character classes, combined using set op- + erators. + + [x&&[^y]] UTS#18 extended character class + + x||y set union (OR) + x&&y set intersection (AND) + x--y set difference (AND NOT) + x~~y set symmetric difference (XOR) + + +PERL EXTENDED CHARACTER CLASSES + + (?[...]) Perl extended character class + (?[\p{Thai} & \p{Nd}]) operators; whitespace ignored + (?[(x - y) & z]) parentheses for grouping + + (?[ [^3] & \p{Nd} ]) [...] is a nested ordinary class + (?[ [:alpha:] - [z] ]) POSIX set is allowed outside [...] + (?[ \d - [3] ]) backslash-escaped set is allowed outside + [...] + (?[ !\n & [:ascii:] ]) backslash-escaped character is allowed out- + side [...] + all other characters or ranges must be enclosed + in [...] + + x|y, x+y set union (OR) + x&y set intersection (AND) + x-y set difference (AND NOT) + x^y set symmetric difference (XOR) + !x set complement (NOT) + + Inside a Perl extended character class, [...] switches mode to be in- + terpreted as an ordinary character class. Outside of a nested [...], + the only items permitted are backslash-escapes, POSIX sets, operators, + and parentheses. Inside a nested ordinary class, ^ has its usual mean- + ing (inverts the class when used as the first character); outside of a + nested class, ^ is the XOR operator. + QUANTIFIERS @@ -11289,7 +11908,7 @@ OPTION SETTING (?^) unset imnrsx options (?aP) implies (?aT) as well, though this has no additional effect. How- - ever, it means that (?-aP) is really (?-PT) which disables all ASCII + ever, it means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for POSIX classes. Unsetting x or xx unsets both. Several options may be set at once, and @@ -11299,20 +11918,25 @@ OPTION SETTING capture group, for example (?i:...). The following are recognized only at the very start of a pattern or af- - ter one of the newline or \R options with similar syntax. More than one - of them may appear. For the first three, d is a decimal number. - - (*LIMIT_DEPTH=d) set the backtracking limit to d - (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes - (*LIMIT_MATCH=d) set the match limit to d - (*NOTEMPTY) set PCRE2_NOTEMPTY when matching - (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching - (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) + ter one of the newline or \R sequences or options with similar syntax. + More than one of them may appear. For the first three, d is a decimal + number. + + (*LIMIT_DEPTH=d) set the backtracking limit to d + (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes + (*LIMIT_MATCH=d) set the match limit to d + (*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) - (*NO_JIT) disable JIT optimization - (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) - (*UTF) set appropriate UTF mode for the library in use - (*UCP) set PCRE2_UCP (use Unicode properties for \d etc) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OP- + TIMIZE) + (*TURKISH_CASING) set PCRE2_EXTRA_TURKISH_CASING when matching + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \d + etc) Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of the limits set by the caller of pcre2_match() or @@ -11383,6 +12007,22 @@ NON-ATOMIC LOOKAROUND ASSERTIONS (*non_atomic_positive_lookbehind:...) ) +SUBSTRING SCAN ASSERTION + This feature is not Perl-compatible. + + (*scan_substring:(grouplist)...) scan captured substring + (*scs:(grouplist)...) scan captured substring + + The comma-separated list may identify groups in any of the following + ways: + + n absolute reference + +n relative reference + -n relative reference + name + 'name' name + + SCRIPT RUNS (*script_run:...) ) script run, can be backtracked into @@ -11444,16 +12084,16 @@ CONDITIONAL PATTERNS (?(VERSION[>]=n.m) test PCRE2 version (?(assert) assertion condition - Note the ambiguity of (?(R) and (?(Rn) which might be named reference - conditions or recursion tests. Such a condition is interpreted as a + Note the ambiguity of (?(R) and (?(Rn) which might be named reference + conditions or recursion tests. Such a condition is interpreted as a reference condition if the relevant named group exists. BACKTRACKING CONTROL - All backtracking control verbs may be in the form (*VERB:NAME). For - (*MARK) the name is mandatory, for the others it is optional. (*SKIP) - changes its behaviour if :NAME is present. The others just set a name + All backtracking control verbs may be in the form (*VERB:NAME). For + (*MARK) the name is mandatory, for the others it is optional. (*SKIP) + changes its behaviour if :NAME is present. The others just set a name for passing back to the caller, but this is not a name that (*SKIP) can see. The following act immediately they are reached: @@ -11461,7 +12101,7 @@ BACKTRACKING CONTROL (*FAIL) force backtrack; synonym (*F) (*MARK:NAME) set name to be passed back; synonym (*:NAME) - The following act only when a subsequent match failure causes a back- + The following act only when a subsequent match failure causes a back- track to reach them. They all force a match failure, but they differ in what happens afterwards. Those that advance the start-of-match point do so only if the pattern is not anchored. @@ -11473,7 +12113,7 @@ BACKTRACKING CONTROL (*MARK:NAME); if not found, the (*SKIP) is ignored (*THEN) local failure, backtrack to next alternation - The effect of one of these verbs in a group called as a subroutine is + The effect of one of these verbs in a group called as a subroutine is confined to the subroutine call. @@ -11484,14 +12124,61 @@ CALLOUTS (?C"text") callout with string data The allowed string delimiters are ` ' " ^ % # $ (which are the same for - the start and the end), and the starting delimiter { matched with the - ending delimiter }. To encode the ending delimiter within the string, + the start and the end), and the starting delimiter { matched with the + ending delimiter }. To encode the ending delimiter within the string, double it. +REPLACEMENT STRINGS + + If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for + pcre2_substitute() is not interpreted. Otherwise, by default, the only + special character is the dollar character in one of the following + forms: + + $$ insert a dollar character + $n or ${n} insert the contents of group n + $ insert the contents of named group + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string + $*MARK or ${*MARK} insert a control verb name + + For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is + set, there is additional interpretation: + + 1. Backslash is an escape character, and the forms described in "ES- + CAPED CHARACTERS" above are recognized. Also: + + \Q...\E can be used to suppress interpretation + \l force the next character to lower case + \u force the next character to upper case + \L force subsequent characters to lower case + \U force subsequent characters to upper case + \u\L force next character to upper case, then all lower + \l\U force next character to lower case, then all upper + \E end \L or \U case forcing + \b backspace character (note: as in character class in pat- + tern) + \v vertical tab character (note: not the same as in a pattern) + + 2. The Python form \g, where the angle brackets are part of the syn- + tax and n is either a group name or a number, is recognized as an al- + ternative way of inserting the contents of a group, for example \g<3>. + + 3. Capture substitution supports the following additional forms: + + ${n:-string} default for unset group + ${n:+string1:string2} values for set/unset group + + The substitution strings themselves are expanded. Backslash can be used + to escape colons and closing curly brackets. + + SEE ALSO - pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), + pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2(3). @@ -11504,20 +12191,19 @@ AUTHOR REVISION - Last updated: 12 October 2023 - Copyright (c) 1997-2023 University of Cambridge. + Last updated: 27 November 2024 + Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 12 October 2023 PCRE2SYNTAX(3) +PCRE2 10.45-RC1 27 November 2024 PCRE2SYNTAX(3) ------------------------------------------------------------------------------ - PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) NAME - PCRE - Perl-compatible regular expressions (revised API) + PCRE2 - Perl-compatible regular expressions (revised API) UNICODE AND UTF SUPPORT @@ -11554,7 +12240,7 @@ UNICODE PROPERTY SUPPORT ting. The Unicode properties that can be tested are a subset of those that Perl supports. Currently they are limited to the general category properties such as Lu for an upper case letter or Nd for a decimal num- - ber, the derived properties Any and LC (synonym L&), the Unicode script + ber, the derived properties Any and Lc (synonym L&), the Unicode script names such as Arabic or Han, Bidi_Class, Bidi_Control, and a few binary properties. @@ -11647,173 +12333,203 @@ UNICODE CASE-EQUIVALENCE in a case equivalence must either be ASCII or non-ASCII; there can be no mixing. + Without PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' = U+212A (Kelvin sign) + 's' = 'S' = U+017F (long S) + With PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' + U+212A (Kelvin sign) only case-equivalent to itself + 's' = 'S' + U+017F (long S) only case-equivalent to itself + + One language family, Turkish and Azeri, has its own case-insensitivity + rules, which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. + This alters the behaviour of the 'i', 'I', U+0130 (capital I with dot + above), and U+0131 (small dotless i) characters. + + Without PCRE2_EXTRA_TURKISH_CASING: + 'i' = 'I' + U+0130 (capital I with dot above) only case-equivalent to itself + U+0131 (small dotless i) only case-equivalent to itself + With PCRE2_EXTRA_TURKISH_CASING: + 'i' = U+0130 (capital I with dot above) + U+0131 (small dotless i) = 'I' + + It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and + PCRE2_EXTRA_TURKISH_CASING together. + + From release 10.45 the Unicode letter properties Lu (upper case), Ll + (lower case), and Lt (title case) are all treated as Lc (cased letter) + when caseless matching is set by the PCRE2_CASELESS option or (?i) + within the pattern. + SCRIPT RUNS - The pattern constructs (*script_run:...) and (*atomic_script_run:...), - with synonyms (*sr:...) and (*asr:...), verify that the string matched - within the parentheses is a script run. In concept, a script run is a - sequence of characters that are all from the same Unicode script. How- + The pattern constructs (*script_run:...) and (*atomic_script_run:...), + with synonyms (*sr:...) and (*asr:...), verify that the string matched + within the parentheses is a script run. In concept, a script run is a + sequence of characters that are all from the same Unicode script. How- ever, because some scripts are commonly used together, and because some - diacritical and other marks are used with multiple scripts, it is not + diacritical and other marks are used with multiple scripts, it is not that simple. Every Unicode character has a Script property, mostly with a value cor- - responding to the name of a script, such as Latin, Greek, or Cyrillic. + responding to the name of a script, such as Latin, Greek, or Cyrillic. There are also three special values: "Unknown" is used for code points that have not been assigned, and also - for the surrogate code points. In the PCRE2 32-bit library, characters - whose code points are greater than the Unicode maximum (U+10FFFF), - which are accessible only in non-UTF mode, are assigned the Unknown + for the surrogate code points. In the PCRE2 32-bit library, characters + whose code points are greater than the Unicode maximum (U+10FFFF), + which are accessible only in non-UTF mode, are assigned the Unknown script. - "Common" is used for characters that are used with many scripts. These - include punctuation, emoji, mathematical, musical, and currency sym- + "Common" is used for characters that are used with many scripts. These + include punctuation, emoji, mathematical, musical, and currency sym- bols, and the ASCII digits 0 to 9. - "Inherited" is used for characters such as diacritical marks that mod- + "Inherited" is used for characters such as diacritical marks that mod- ify a previous character. These are considered to take on the script of the character that they modify. - Some Inherited characters are used with many scripts, but many of them - are only normally used with a small number of scripts. For example, + Some Inherited characters are used with many scripts, but many of them + are only normally used with a small number of scripts. For example, U+102E0 (Coptic Epact thousands mark) is used only with Arabic and Cop- - tic. In order to make it possible to check this, a Unicode property + tic. In order to make it possible to check this, a Unicode property called Script Extension exists. Its value is a list of scripts that ap- ply to the character. For the majority of characters, the list contains - just one script, the same one as the Script property. However, for - characters such as U+102E0 more than one Script is listed. There are - also some Common characters that have a single, non-Common script in + just one script, the same one as the Script property. However, for + characters such as U+102E0 more than one Script is listed. There are + also some Common characters that have a single, non-Common script in their Script Extension list. The next section describes the basic rules for deciding whether a given - string of characters is a script run. Note, however, that there are - some special cases involving the Chinese Han script, and an additional - constraint for decimal digits. These are covered in subsequent sec- + string of characters is a script run. Note, however, that there are + some special cases involving the Chinese Han script, and an additional + constraint for decimal digits. These are covered in subsequent sec- tions. Basic script run rules A string that is less than two characters long is a script run. This is - the only case in which an Unknown character can be part of a script - run. Longer strings are checked using only the Script Extensions prop- + the only case in which an Unknown character can be part of a script + run. Longer strings are checked using only the Script Extensions prop- erty, not the basic Script property. - If a character's Script Extension property is the single value "Inher- + If a character's Script Extension property is the single value "Inher- ited", it is always accepted as part of a script run. This is also true - for the property "Common", subject to the checking of decimal digits + for the property "Common", subject to the checking of decimal digits described below. All the remaining characters in a script run must have - at least one script in common in their Script Extension lists. In set- + at least one script in common in their Script Extension lists. In set- theoretic terminology, the intersection of all the sets of scripts must not be empty. - A simple example is an Internet name such as "google.com". The letters + A simple example is an Internet name such as "google.com". The letters are all in the Latin script, and the dot is Common, so this string is a script run. However, the Cyrillic letter "o" looks exactly the same as - the Latin "o"; a string that looks the same, but with Cyrillic "o"s is + the Latin "o"; a string that looks the same, but with Cyrillic "o"s is not a script run. - More interesting examples involve characters with more than one script + More interesting examples involve characters with more than one script in their Script Extension. Consider the following characters: U+060C Arabic comma U+06D4 Arabic full stop - The first has the Script Extension list Arabic, Hanifi Rohingya, Syr- - iac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both - of them could appear in script runs of either Arabic or Hanifi Ro- - hingya. The first could also appear in Syriac or Thaana script runs, + The first has the Script Extension list Arabic, Hanifi Rohingya, Syr- + iac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both + of them could appear in script runs of either Arabic or Hanifi Ro- + hingya. The first could also appear in Syriac or Thaana script runs, but the second could not. The Chinese Han script - The Chinese Han script is commonly used in conjunction with other - scripts for writing certain languages. Japanese uses the Hiragana and - Katakana scripts together with Han; Korean uses Hangul and Han; Tai- - wanese Mandarin uses Bopomofo and Han. These three combinations are - treated as special cases when checking script runs and are, in effect, - "virtual scripts". Thus, a script run may contain a mixture of Hira- - gana, Katakana, and Han, or a mixture of Hangul and Han, or a mixture - of Bopomofo and Han, but not, for example, a mixture of Hangul and - Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical Stan- - dard 39 ("Unicode Security Mechanisms", http://unicode.org/re- + The Chinese Han script is commonly used in conjunction with other + scripts for writing certain languages. Japanese uses the Hiragana and + Katakana scripts together with Han; Korean uses Hangul and Han; Tai- + wanese Mandarin uses Bopomofo and Han. These three combinations are + treated as special cases when checking script runs and are, in effect, + "virtual scripts". Thus, a script run may contain a mixture of Hira- + gana, Katakana, and Han, or a mixture of Hangul and Han, or a mixture + of Bopomofo and Han, but not, for example, a mixture of Hangul and + Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical Stan- + dard 39 ("Unicode Security Mechanisms", http://unicode.org/re- ports/tr39/) in allowing such mixtures. Decimal digits - Unicode contains many sets of 10 decimal digits in different scripts, - and some scripts (including the Common script) contain more than one - set. Some of these decimal digits them are visually indistinguishable - from the common ASCII digits. In addition to the script checking de- - scribed above, if a script run contains any decimal digits, they must + Unicode contains many sets of 10 decimal digits in different scripts, + and some scripts (including the Common script) contain more than one + set. Some of these decimal digits them are visually indistinguishable + from the common ASCII digits. In addition to the script checking de- + scribed above, if a script run contains any decimal digits, they must all come from the same set of 10 adjacent characters. VALIDITY OF UTF STRINGS - When the PCRE2_UTF option is set, the strings passed as patterns and + When the PCRE2_UTF option is set, the strings passed as patterns and subjects are (by default) checked for validity on entry to the relevant functions. If an invalid UTF string is passed, a negative error code is - returned. The code unit offset to the offending character can be ex- - tracted from the match data block by calling pcre2_get_startchar(), + returned. The code unit offset to the offending character can be ex- + tracted from the match data block by calling pcre2_get_startchar(), which is used for this purpose after a UTF error. - In some situations, you may already know that your strings are valid, - and therefore want to skip these checks in order to improve perfor- - mance, for example in the case of a long subject string that is being - scanned repeatedly. If you set the PCRE2_NO_UTF_CHECK option at com- - pile time or at match time, PCRE2 assumes that the pattern or subject + In some situations, you may already know that your strings are valid, + and therefore want to skip these checks in order to improve perfor- + mance, for example in the case of a long subject string that is being + scanned repeatedly. If you set the PCRE2_NO_UTF_CHECK option at com- + pile time or at match time, PCRE2 assumes that the pattern or subject it is given (respectively) contains only valid UTF code unit sequences. - If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the - result is undefined and your program may crash or loop indefinitely or - give incorrect results. There is, however, one mode of matching that - can handle invalid UTF subject strings. This is enabled by passing - PCRE2_MATCH_INVALID_UTF to pcre2_compile() and is discussed below in - the next section. The rest of this section covers the case when + If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the + result is undefined and your program may crash or loop indefinitely or + give incorrect results. There is, however, one mode of matching that + can handle invalid UTF subject strings. This is enabled by passing + PCRE2_MATCH_INVALID_UTF to pcre2_compile() and is discussed below in + the next section. The rest of this section covers the case when PCRE2_MATCH_INVALID_UTF is not set. - Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the UTF - check for the pattern; it does not also apply to subject strings. If - you want to disable the check for a subject string you must pass this + Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the UTF + check for the pattern; it does not also apply to subject strings. If + you want to disable the check for a subject string you must pass this same option to pcre2_match() or pcre2_dfa_match(). UTF-16 and UTF-32 strings can indicate their endianness by special code - knows as a byte-order mark (BOM). The PCRE2 functions do not handle + knows as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting strings to be in host byte order. - Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any + Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other processing takes place. In the case of pcre2_match() and - pcre2_dfa_match() calls with a non-zero starting offset, the check is + pcre2_dfa_match() calls with a non-zero starting offset, the check is applied only to that part of the subject that could be inspected during - matching, and there is a check that the starting offset points to the - first code unit of a character or to the end of the subject. If there - are no lookbehind assertions in the pattern, the check starts at the - starting offset. Otherwise, it starts at the length of the longest - lookbehind before the starting offset, or at the start of the subject - if there are not that many characters before the starting offset. Note + matching, and there is a check that the starting offset points to the + first code unit of a character or to the end of the subject. If there + are no lookbehind assertions in the pattern, the check starts at the + starting offset. Otherwise, it starts at the length of the longest + lookbehind before the starting offset, or at the start of the subject + if there are not that many characters before the starting offset. Note that the sequences \b and \B are one-character lookbehinds. - In addition to checking the format of the string, there is a check to + In addition to checking the format of the string, there is a check to ensure that all code points lie in the range U+0 to U+10FFFF, excluding - the surrogate area. The so-called "non-character" code points are not + the surrogate area. The so-called "non-character" code points are not excluded because Unicode corrigendum #9 makes it clear that they should not be. - Characters in the "Surrogate Area" of Unicode are reserved for use by - UTF-16, where they are used in pairs to encode code points with values - greater than 0xFFFF. The code points that are encoded by UTF-16 pairs - are available independently in the UTF-8 and UTF-32 encodings. (In - other words, the whole surrogate thing is a fudge for UTF-16 which un- + Characters in the "Surrogate Area" of Unicode are reserved for use by + UTF-16, where they are used in pairs to encode code points with values + greater than 0xFFFF. The code points that are encoded by UTF-16 pairs + are available independently in the UTF-8 and UTF-32 encodings. (In + other words, the whole surrogate thing is a fudge for UTF-16 which un- fortunately messes up UTF-8 and UTF-32.) - Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error - that is given if an escape sequence for an invalid Unicode code point - is encountered in the pattern. If you want to allow escape sequences - such as \x{d800} (a surrogate code point) you can set the PCRE2_EX- - TRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible - only in UTF-8 and UTF-32 modes, because these values are not repre- + Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error + that is given if an escape sequence for an invalid Unicode code point + is encountered in the pattern. If you want to allow escape sequences + such as \x{d800} (a surrogate code point) you can set the PCRE2_EX- + TRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible + only in UTF-8 and UTF-32 modes, because these values are not repre- sentable in UTF-16. Errors in UTF-8 strings @@ -11826,10 +12542,10 @@ VALIDITY OF UTF STRINGS PCRE2_ERROR_UTF8_ERR4 PCRE2_ERROR_UTF8_ERR5 - The string ends with a truncated UTF-8 character; the code specifies - how many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 - characters to be no longer than 4 bytes, the encoding scheme (origi- - nally defined by RFC 2279) allows for up to 6 bytes, and this is + The string ends with a truncated UTF-8 character; the code specifies + how many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 + characters to be no longer than 4 bytes, the encoding scheme (origi- + nally defined by RFC 2279) allows for up to 6 bytes, and this is checked first; hence the possibility of 4 or 5 missing bytes. PCRE2_ERROR_UTF8_ERR6 @@ -11839,13 +12555,13 @@ VALIDITY OF UTF STRINGS PCRE2_ERROR_UTF8_ERR10 The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of - the character do not have the binary value 0b10 (that is, either the + the character do not have the binary value 0b10 (that is, either the most significant bit is 0, or the next bit is 1). PCRE2_ERROR_UTF8_ERR11 PCRE2_ERROR_UTF8_ERR12 - A character that is valid by the RFC 2279 rules is either 5 or 6 bytes + A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long; these code points are excluded by RFC 3629. PCRE2_ERROR_UTF8_ERR13 @@ -11855,8 +12571,8 @@ VALIDITY OF UTF STRINGS PCRE2_ERROR_UTF8_ERR14 - A 3-byte character has a value in the range 0xd800 to 0xdfff; this - range of code points are reserved by RFC 3629 for use with UTF-16, and + A 3-byte character has a value in the range 0xd800 to 0xdfff; this + range of code points are reserved by RFC 3629 for use with UTF-16, and so are excluded from UTF-8. PCRE2_ERROR_UTF8_ERR15 @@ -11865,26 +12581,26 @@ VALIDITY OF UTF STRINGS PCRE2_ERROR_UTF8_ERR18 PCRE2_ERROR_UTF8_ERR19 - A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes - for a value that can be represented by fewer bytes, which is invalid. - For example, the two bytes 0xc0, 0xae give the value 0x2e, whose cor- + A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes + for a value that can be represented by fewer bytes, which is invalid. + For example, the two bytes 0xc0, 0xae give the value 0x2e, whose cor- rect coding uses just one byte. PCRE2_ERROR_UTF8_ERR20 The two most significant bits of the first byte of a character have the - binary value 0b10 (that is, the most significant bit is 1 and the sec- - ond is 0). Such a byte can only validly occur as the second or subse- + binary value 0b10 (that is, the most significant bit is 1 and the sec- + ond is 0). Such a byte can only validly occur as the second or subse- quent byte of a multi-byte character. PCRE2_ERROR_UTF8_ERR21 - The first byte of a character has the value 0xfe or 0xff. These values + The first byte of a character has the value 0xfe or 0xff. These values can never occur in a valid UTF-8 string. Errors in UTF-16 strings - The following negative error codes are given for invalid UTF-16 + The following negative error codes are given for invalid UTF-16 strings: PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string @@ -11894,7 +12610,7 @@ VALIDITY OF UTF STRINGS Errors in UTF-32 strings - The following negative error codes are given for invalid UTF-32 + The following negative error codes are given for invalid UTF-32 strings: PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) @@ -11904,60 +12620,60 @@ VALIDITY OF UTF STRINGS MATCHING IN INVALID UTF STRINGS You can run pattern matches on subject strings that may contain invalid - UTF sequences if you call pcre2_compile() with the PCRE2_MATCH_IN- - VALID_UTF option. This is supported by pcre2_match(), including JIT + UTF sequences if you call pcre2_compile() with the PCRE2_MATCH_IN- + VALID_UTF option. This is supported by pcre2_match(), including JIT matching, but not by pcre2_dfa_match(). When PCRE2_MATCH_INVALID_UTF is - set, it forces PCRE2_UTF to be set as well. Note, however, that the + set, it forces PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a valid UTF string. - If you do not set PCRE2_MATCH_INVALID_UTF when calling pcre2_compile, - and you are not certain that your subject strings are valid UTF se- - quences, you should not make use of the JIT "fast path" function - pcre2_jit_match() because it bypasses sanity checks, including the one - for UTF validity. An invalid string may cause undefined behaviour, in- + If you do not set PCRE2_MATCH_INVALID_UTF when calling pcre2_compile, + and you are not certain that your subject strings are valid UTF se- + quences, you should not make use of the JIT "fast path" function + pcre2_jit_match() because it bypasses sanity checks, including the one + for UTF validity. An invalid string may cause undefined behaviour, in- cluding looping, crashing, or giving the wrong answer. - Setting PCRE2_MATCH_INVALID_UTF does not affect what pcre2_compile() - generates, but if pcre2_jit_compile() is subsequently called, it does + Setting PCRE2_MATCH_INVALID_UTF does not affect what pcre2_compile() + generates, but if pcre2_jit_compile() is subsequently called, it does generate different code. If JIT is not used, the option affects the be- haviour of the interpretive code in pcre2_match(). When PCRE2_MATCH_IN- - VALID_UTF is set at compile time, PCRE2_NO_UTF_CHECK is ignored at + VALID_UTF is set at compile time, PCRE2_NO_UTF_CHECK is ignored at match time. - In this mode, an invalid code unit sequence in the subject never - matches any pattern item. It does not match dot, it does not match - \p{Any}, it does not even match negative items such as [^X]. A lookbe- - hind assertion fails if it encounters an invalid sequence while moving - the current point backwards. In other words, an invalid UTF code unit + In this mode, an invalid code unit sequence in the subject never + matches any pattern item. It does not match dot, it does not match + \p{Any}, it does not even match negative items such as [^X]. A lookbe- + hind assertion fails if it encounters an invalid sequence while moving + the current point backwards. In other words, an invalid UTF code unit sequence acts as a barrier which no match can cross. You can also think of this as the subject being split up into fragments - of valid UTF, delimited internally by invalid code unit sequences. The - pattern is matched fragment by fragment. The result of a successful - match, however, is given as code unit offsets in the entire subject + of valid UTF, delimited internally by invalid code unit sequences. The + pattern is matched fragment by fragment. The result of a successful + match, however, is given as code unit offsets in the entire subject string in the usual way. There are a few points to consider: - The internal boundaries are not interpreted as the beginnings or ends - of lines and so do not match circumflex or dollar characters in the + The internal boundaries are not interpreted as the beginnings or ends + of lines and so do not match circumflex or dollar characters in the pattern. - If pcre2_match() is called with an offset that points to an invalid - UTF-sequence, that sequence is skipped, and the match starts at the + If pcre2_match() is called with an offset that points to an invalid + UTF-sequence, that sequence is skipped, and the match starts at the next valid UTF character, or the end of the subject. At internal fragment boundaries, \b and \B behave in the same way as at - the beginning and end of the subject. For example, a sequence such as - \bWORD\b would match an instance of WORD that is surrounded by invalid + the beginning and end of the subject. For example, a sequence such as + \bWORD\b would match an instance of WORD that is surrounded by invalid UTF code units. - Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbi- - trary data, knowing that any matched strings that are returned are + Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbi- + trary data, knowing that any matched strings that are returned are valid UTF. This can be useful when searching for UTF text in executable or other binary files. - Note, however, that the 16-bit and 32-bit PCRE2 libraries process - strings as sequences of uint16_t or uint32_t code points. They cannot - find valid UTF sequences within an arbitrary string of bytes unless + Note, however, that the 16-bit and 32-bit PCRE2 libraries process + strings as sequences of uint16_t or uint32_t code points. They cannot + find valid UTF sequences within an arbitrary string of bytes unless such sequences are suitably aligned. @@ -11970,11 +12686,11 @@ AUTHOR REVISION - Last updated: 12 October 2023 - Copyright (c) 1997-2023 University of Cambridge. + Last updated: 27 November 2024 + Copyright (c) 1997-2024 University of Cambridge. -PCRE2 10.43 04 February 2023 PCRE2UNICODE(3) +PCRE2 10.45-RC1 27 November 2024 PCRE2UNICODE(3) ------------------------------------------------------------------------------ diff --git a/doc/pcre2_callout_enumerate.3 b/doc/pcre2_callout_enumerate.3 index 109c9be..746a073 100644 --- a/doc/pcre2_callout_enumerate.3 +++ b/doc/pcre2_callout_enumerate.3 @@ -1,4 +1,4 @@ -.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.30" +.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_code_copy.3 b/doc/pcre2_code_copy.3 index 09b4705..b022d88 100644 --- a/doc/pcre2_code_copy.3 +++ b/doc/pcre2_code_copy.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23" +.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_code_copy_with_tables.3 b/doc/pcre2_code_copy_with_tables.3 index cfbddb3..f6a8672 100644 --- a/doc/pcre2_code_copy_with_tables.3 +++ b/doc/pcre2_code_copy_with_tables.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23" +.TH PCRE2_CODE_COPY 3 "16 January 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_code_free.3 b/doc/pcre2_code_free.3 index 9e0ad3c..125209f 100644 --- a/doc/pcre2_code_free.3 +++ b/doc/pcre2_code_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_compile.3 b/doc/pcre2_compile.3 index 151a703..ef742d9 100644 --- a/doc/pcre2_compile.3 +++ b/doc/pcre2_compile.3 @@ -1,4 +1,4 @@ -.TH PCRE2_COMPILE 3 "19 January 2024" "PCRE2 10.43" +.TH PCRE2_COMPILE 3 "30 October 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -45,6 +45,7 @@ The primary option bits are: PCRE2_ALLOW_EMPTY_CLASS Allow empty classes PCRE2_ALT_BSUX Alternative handling of \eu, \eU, and \ex PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode + PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax PCRE2_ALT_VERBNAMES Process backslashes in verb names PCRE2_AUTO_CALLOUT Compile automatic callouts PCRE2_CASELESS Do caseless matching diff --git a/doc/pcre2_compile_context_copy.3 b/doc/pcre2_compile_context_copy.3 index aea1187..af2fa9a 100644 --- a/doc/pcre2_compile_context_copy.3 +++ b/doc/pcre2_compile_context_copy.3 @@ -1,4 +1,4 @@ -.TH PCRE2_COMPILE_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_COMPILE_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_compile_context_create.3 b/doc/pcre2_compile_context_create.3 index 3053df4..e10493e 100644 --- a/doc/pcre2_compile_context_create.3 +++ b/doc/pcre2_compile_context_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_compile_context_free.3 b/doc/pcre2_compile_context_free.3 index e90d744..4d12538 100644 --- a/doc/pcre2_compile_context_free.3 +++ b/doc/pcre2_compile_context_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_COMPILE_CONTEXT_FREE 3 "29 June 2018" "PCRE2 10.32" +.TH PCRE2_COMPILE_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_config.3 b/doc/pcre2_config.3 index ab9623d..37bc6da 100644 --- a/doc/pcre2_config.3 +++ b/doc/pcre2_config.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.31" +.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_convert_context_copy.3 b/doc/pcre2_convert_context_copy.3 index 827c3e9..eea5e31 100644 --- a/doc/pcre2_convert_context_copy.3 +++ b/doc/pcre2_convert_context_copy.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30" +.TH PCRE2_CONVERT_CONTEXT_COPY 3 "12 July 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_convert_context_create.3 b/doc/pcre2_convert_context_create.3 index 91c17fb..eab08c8 100644 --- a/doc/pcre2_convert_context_create.3 +++ b/doc/pcre2_convert_context_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30" +.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "12 July 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_convert_context_free.3 b/doc/pcre2_convert_context_free.3 index 3fd5783..929b53e 100644 --- a/doc/pcre2_convert_context_free.3 +++ b/doc/pcre2_convert_context_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CONVERT_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_CONVERT_CONTEXT_FREE 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_converted_pattern_free.3 b/doc/pcre2_converted_pattern_free.3 index b0645b5..ceb2bcc 100644 --- a/doc/pcre2_converted_pattern_free.3 +++ b/doc/pcre2_converted_pattern_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_CONVERTED_PATTERN_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_CONVERTED_PATTERN_FREE 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_dfa_match.3 b/doc/pcre2_dfa_match.3 index ea87aac..ef59838 100644 --- a/doc/pcre2_dfa_match.3 +++ b/doc/pcre2_dfa_match.3 @@ -1,4 +1,4 @@ -.TH PCRE2_DFA_MATCH 3 "28 August 2021" "PCRE2 10.38" +.TH PCRE2_DFA_MATCH 3 "31 August 2021" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_general_context_copy.3 b/doc/pcre2_general_context_copy.3 index 637e565..57b34fe 100644 --- a/doc/pcre2_general_context_copy.3 +++ b/doc/pcre2_general_context_copy.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GENERAL_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_GENERAL_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_general_context_create.3 b/doc/pcre2_general_context_create.3 index e204a95..a6f3a0c 100644 --- a/doc/pcre2_general_context_create.3 +++ b/doc/pcre2_general_context_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "23 January 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_general_context_free.3 b/doc/pcre2_general_context_free.3 index df1aa1f..c253ea5 100644 --- a/doc/pcre2_general_context_free.3 +++ b/doc/pcre2_general_context_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_error_message.3 b/doc/pcre2_get_error_message.3 index 3d3e0de..c7b1171 100644 --- a/doc/pcre2_get_error_message.3 +++ b/doc/pcre2_get_error_message.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.30" +.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_mark.3 b/doc/pcre2_get_mark.3 index dce377d..cb38a57 100644 --- a/doc/pcre2_get_mark.3 +++ b/doc/pcre2_get_mark.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_MARK 3 "13 October 2017" "PCRE2 10.31" +.TH PCRE2_GET_MARK 3 "13 January 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_match_data_heapframes_size.3 b/doc/pcre2_get_match_data_heapframes_size.3 index 1eba42b..4ecdfdd 100644 --- a/doc/pcre2_get_match_data_heapframes_size.3 +++ b/doc/pcre2_get_match_data_heapframes_size.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "13 January 2023" "PCRE2 10.43" +.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "18 January 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_match_data_size.3 b/doc/pcre2_get_match_data_size.3 index cf5fa5e..3c3e0d6 100644 --- a/doc/pcre2_get_match_data_size.3 +++ b/doc/pcre2_get_match_data_size.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_MATCH_DATA_SIZE 3 "16 July 2019" "PCRE2 10.34" +.TH PCRE2_GET_MATCH_DATA_SIZE 3 "17 October 2019" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_ovector_count.3 b/doc/pcre2_get_ovector_count.3 index 3f6d748..38e1899 100644 --- a/doc/pcre2_get_ovector_count.3 +++ b/doc/pcre2_get_ovector_count.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_OVECTOR_COUNT 3 "24 October 2014" "PCRE2 10.00" +.TH PCRE2_GET_OVECTOR_COUNT 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_ovector_pointer.3 b/doc/pcre2_get_ovector_pointer.3 index 261d652..685f6d4 100644 --- a/doc/pcre2_get_ovector_pointer.3 +++ b/doc/pcre2_get_ovector_pointer.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_OVECTOR_POINTER 3 "24 October 2014" "PCRE2 10.00" +.TH PCRE2_GET_OVECTOR_POINTER 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_get_startchar.3 b/doc/pcre2_get_startchar.3 index c6ac8b0..c83e852 100644 --- a/doc/pcre2_get_startchar.3 +++ b/doc/pcre2_get_startchar.3 @@ -1,4 +1,4 @@ -.TH PCRE2_GET_STARTCHAR 3 "24 October 2014" "PCRE2 10.00" +.TH PCRE2_GET_STARTCHAR 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_jit_compile.3 b/doc/pcre2_jit_compile.3 index 6cc1788..f32c2a1 100644 --- a/doc/pcre2_jit_compile.3 +++ b/doc/pcre2_jit_compile.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_COMPILE 3 "29 July 2019" "PCRE2 10.34" +.TH PCRE2_JIT_COMPILE 3 "22 August 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -22,9 +22,17 @@ details are given in the .\" documentation. .P -The first argument is a pointer that was returned by a successful call to -\fBpcre2_compile()\fP, and the second must contain one or more of the following -bits: +The availability of JIT support can be tested by calling +\fBpcre2_compile_jit()\fP with a single option PCRE2_JIT_TEST_ALLOC (the +code argument is ignored, so a NULL value is accepted). Such a call +returns zero if JIT is available and has a working allocator. Otherwise +it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate +executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not +compiled. +.P +Otherwise, the first argument must be a pointer that was returned by a +successful call to \fBpcre2_compile()\fP, and the second must contain one or +more of the following bits: .sp PCRE2_JIT_COMPLETE compile code for full matching PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching @@ -34,11 +42,13 @@ There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old option is deprecated and may be removed in the future. .P -The yield of the function is 0 for success, or a negative error code otherwise. -In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or -if an unknown bit is set in \fIoptions\fP. The function can also return -PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the -compiler, even if it was because of a system security restriction. +The yield of the function when called with any of the three options above is 0 +for success, or a negative error code otherwise. In particular, +PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown +bit is set in \fIoptions\fP. The function can also return PCRE2_ERROR_NOMEMORY +if JIT is unable to allocate executable memory for the compiler, even if it was +because of a system security restriction. In a few cases, the function may +return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features. .P There is a complete description of the PCRE2 native API in the .\" HREF diff --git a/doc/pcre2_jit_free_unused_memory.3 b/doc/pcre2_jit_free_unused_memory.3 index 183bba0..cc4fee4 100644 --- a/doc/pcre2_jit_free_unused_memory.3 +++ b/doc/pcre2_jit_free_unused_memory.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_FREE_UNUSED_MEMORY 3 "27 October 2014" "PCRE2 10.00" +.TH PCRE2_JIT_FREE_UNUSED_MEMORY 3 "24 April 2020" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_jit_match.3 b/doc/pcre2_jit_match.3 index 72c4bc1..d4734e7 100644 --- a/doc/pcre2_jit_match.3 +++ b/doc/pcre2_jit_match.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_MATCH 3 "20 January 2023" "PCRE2 10.43" +.TH PCRE2_JIT_MATCH 3 "20 January 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_jit_stack_assign.3 b/doc/pcre2_jit_stack_assign.3 index 33d2e1c..27f6780 100644 --- a/doc/pcre2_jit_stack_assign.3 +++ b/doc/pcre2_jit_stack_assign.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_STACK_ASSIGN 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_JIT_STACK_ASSIGN 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_jit_stack_create.3 b/doc/pcre2_jit_stack_create.3 index 74cdb1d..52c7464 100644 --- a/doc/pcre2_jit_stack_create.3 +++ b/doc/pcre2_jit_stack_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_STACK_CREATE 3 "24 March 2017" "PCRE2 10.30" +.TH PCRE2_JIT_STACK_CREATE 3 "23 January 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_jit_stack_free.3 b/doc/pcre2_jit_stack_free.3 index 2131a79..b391526 100644 --- a/doc/pcre2_jit_stack_free.3 +++ b/doc/pcre2_jit_stack_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_JIT_STACK_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_JIT_STACK_FREE 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_maketables.3 b/doc/pcre2_maketables.3 index 7dc8438..c565308 100644 --- a/doc/pcre2_maketables.3 +++ b/doc/pcre2_maketables.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MAKETABLES 3 "17 April 2017" "PCRE2 10.30" +.TH PCRE2_MAKETABLES 3 "28 July 2019" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_maketables_free.3 b/doc/pcre2_maketables_free.3 index 07986b9..9581e88 100644 --- a/doc/pcre2_maketables_free.3 +++ b/doc/pcre2_maketables_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MAKETABLES_FREE 3 "02 September 2019" "PCRE2 10.34" +.TH PCRE2_MAKETABLES_FREE 3 "03 September 2019" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match.3 b/doc/pcre2_match.3 index efdd892..37855cb 100644 --- a/doc/pcre2_match.3 +++ b/doc/pcre2_match.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH 3 "27 January 2024" "PCRE2 10.43" +.TH PCRE2_MATCH 3 "27 January 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match_context_copy.3 b/doc/pcre2_match_context_copy.3 index 26c33a6..a4eca53 100644 --- a/doc/pcre2_match_context_copy.3 +++ b/doc/pcre2_match_context_copy.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_MATCH_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match_context_create.3 b/doc/pcre2_match_context_create.3 index d4a2665..2522702 100644 --- a/doc/pcre2_match_context_create.3 +++ b/doc/pcre2_match_context_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_MATCH_CONTEXT_CREATE 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match_context_free.3 b/doc/pcre2_match_context_free.3 index 7d19f98..96520b9 100644 --- a/doc/pcre2_match_context_free.3 +++ b/doc/pcre2_match_context_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_MATCH_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match_data_create.3 b/doc/pcre2_match_data_create.3 index 439dea3..131a4a1 100644 --- a/doc/pcre2_match_data_create.3 +++ b/doc/pcre2_match_data_create.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_DATA_CREATE 3 "28 August 2021" "PCRE2 10.38" +.TH PCRE2_MATCH_DATA_CREATE 3 "28 August 2021" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match_data_create_from_pattern.3 b/doc/pcre2_match_data_create_from_pattern.3 index 829bf6c..72f4b9c 100644 --- a/doc/pcre2_match_data_create_from_pattern.3 +++ b/doc/pcre2_match_data_create_from_pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "28 August 2021" "PCRE2 10.38" +.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "19 August 2022" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_match_data_free.3 b/doc/pcre2_match_data_free.3 index 202035a..ecaf7c0 100644 --- a/doc/pcre2_match_data_free.3 +++ b/doc/pcre2_match_data_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH_DATA_FREE 3 "18 January 2023" "PCRE2 10.43" +.TH PCRE2_MATCH_DATA_FREE 3 "16 August 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_pattern_convert.3 b/doc/pcre2_pattern_convert.3 index b72acb7..b7da6ec 100644 --- a/doc/pcre2_pattern_convert.3 +++ b/doc/pcre2_pattern_convert.3 @@ -1,4 +1,4 @@ -.TH PCRE2_PATTERN_CONVERT 3 "11 July 2017" "PCRE2 10.30" +.TH PCRE2_PATTERN_CONVERT 3 "12 July 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_pattern_info.3 b/doc/pcre2_pattern_info.3 index edd8989..c1fa643 100644 --- a/doc/pcre2_pattern_info.3 +++ b/doc/pcre2_pattern_info.3 @@ -1,4 +1,4 @@ -.TH PCRE2_PATTERN_INFO 3 "14 February 2019" "PCRE2 10.33" +.TH PCRE2_PATTERN_INFO 3 "14 February 2019" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_serialize_decode.3 b/doc/pcre2_serialize_decode.3 index 611113f..4672aad 100644 --- a/doc/pcre2_serialize_decode.3 +++ b/doc/pcre2_serialize_decode.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SERIALIZE_DECODE 3 "27 June 2018" "PCRE2 10.32" +.TH PCRE2_SERIALIZE_DECODE 3 "22 April 2022" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_serialize_encode.3 b/doc/pcre2_serialize_encode.3 index d529360..9c9519a 100644 --- a/doc/pcre2_serialize_encode.3 +++ b/doc/pcre2_serialize_encode.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SERIALIZE_ENCODE 3 "27 June 2018" "PCRE2 10.32" +.TH PCRE2_SERIALIZE_ENCODE 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_serialize_free.3 b/doc/pcre2_serialize_free.3 index 2c43824..5862e96 100644 --- a/doc/pcre2_serialize_free.3 +++ b/doc/pcre2_serialize_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SERIALIZE_FREE 3 "27 June 2018" "PCRE2 10.32" +.TH PCRE2_SERIALIZE_FREE 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_serialize_get_number_of_codes.3 b/doc/pcre2_serialize_get_number_of_codes.3 index f5eea54..08c32a7 100644 --- a/doc/pcre2_serialize_get_number_of_codes.3 +++ b/doc/pcre2_serialize_get_number_of_codes.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SERIALIZE_GET_NUMBER_OF_CODES 3 "27 June 2018" "PCRE2 10.32" +.TH PCRE2_SERIALIZE_GET_NUMBER_OF_CODES 3 "13 August 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_bsr.3 b/doc/pcre2_set_bsr.3 index ecf2437..4873f1f 100644 --- a/doc/pcre2_set_bsr.3 +++ b/doc/pcre2_set_bsr.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_BSR 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_SET_BSR 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_callout.3 b/doc/pcre2_set_callout.3 index cb48e14..dfa8227 100644 --- a/doc/pcre2_set_callout.3 +++ b/doc/pcre2_set_callout.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_CALLOUT 3 "21 March 2017" "PCRE2 10.30" +.TH PCRE2_SET_CALLOUT 3 "25 March 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_character_tables.3 b/doc/pcre2_set_character_tables.3 index 1ca4134..e19cecc 100644 --- a/doc/pcre2_set_character_tables.3 +++ b/doc/pcre2_set_character_tables.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_CHARACTER_TABLES 3 "20 March 2020" "PCRE2 10.35" +.TH PCRE2_SET_CHARACTER_TABLES 3 "15 April 2020" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_compile_extra_options.3 b/doc/pcre2_set_compile_extra_options.3 index a1e07e9..585a610 100644 --- a/doc/pcre2_set_compile_extra_options.3 +++ b/doc/pcre2_set_compile_extra_options.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "03 February 2023" "PCRE2 10.43" +.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "14 October 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -43,6 +43,10 @@ options are: PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \er as \en PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines PCRE2_EXTRA_MATCH_WORD Pattern matches "words" + PCRE2_EXTRA_NEVER_CALLOUT Disallow callouts in pattern + PCRE2_EXTRA_NO_BS0 Disallow \e0 (but not \e00 or \e000) + PCRE2_EXTRA_PYTHON_OCTAL Use Python rules for octal + PCRE2_EXTRA_TURKISH_CASING Use Turkish I case folding .sp There is a complete description of the PCRE2 native API in the .\" HREF diff --git a/doc/pcre2_set_compile_recursion_guard.3 b/doc/pcre2_set_compile_recursion_guard.3 index 0575f94..6f93206 100644 --- a/doc/pcre2_set_compile_recursion_guard.3 +++ b/doc/pcre2_set_compile_recursion_guard.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_COMPILE_RECURSION_GUARD 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_SET_COMPILE_RECURSION_GUARD 3 "26 November 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_depth_limit.3 b/doc/pcre2_set_depth_limit.3 index 62bc7fe..8f3da0c 100644 --- a/doc/pcre2_set_depth_limit.3 +++ b/doc/pcre2_set_depth_limit.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_DEPTH_LIMIT 3 "25 March 2017" "PCRE2 10.30" +.TH PCRE2_SET_DEPTH_LIMIT 3 "25 March 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_glob_escape.3 b/doc/pcre2_set_glob_escape.3 index d5637af..ca02ca6 100644 --- a/doc/pcre2_set_glob_escape.3 +++ b/doc/pcre2_set_glob_escape.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_GLOB_ESCAPE 3 "11 July 2017" "PCRE2 10.30" +.TH PCRE2_SET_GLOB_ESCAPE 3 "12 July 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_glob_separator.3 b/doc/pcre2_set_glob_separator.3 index 5d78c09..d10ca30 100644 --- a/doc/pcre2_set_glob_separator.3 +++ b/doc/pcre2_set_glob_separator.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_GLOB_SEPARATOR 3 "11 July 2017" "PCRE2 10.30" +.TH PCRE2_SET_GLOB_SEPARATOR 3 "17 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_heap_limit.3 b/doc/pcre2_set_heap_limit.3 index 7c155a2..31721cb 100644 --- a/doc/pcre2_set_heap_limit.3 +++ b/doc/pcre2_set_heap_limit.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_HEAP_LIMIT 3 "11 April 2017" "PCRE2 10.30" +.TH PCRE2_SET_HEAP_LIMIT 3 "17 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_match_limit.3 b/doc/pcre2_set_match_limit.3 index 523e97f..bfe33b3 100644 --- a/doc/pcre2_set_match_limit.3 +++ b/doc/pcre2_set_match_limit.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_MATCH_LIMIT 3 "24 October 2014" "PCRE2 10.00" +.TH PCRE2_SET_MATCH_LIMIT 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_max_pattern_compiled_length.3 b/doc/pcre2_set_max_pattern_compiled_length.3 index 472a7bb..1457b12 100644 --- a/doc/pcre2_set_max_pattern_compiled_length.3 +++ b/doc/pcre2_set_max_pattern_compiled_length.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH 3 "24 April 2024" "PCRE2 10.44" +.TH PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH 3 "09 June 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -15,9 +15,9 @@ PCRE2 - Perl-compatible regular expressions (revised API) .rs .sp This function sets, in a compile context, the maximum size (in bytes) for the -memory needed to hold the compiled version of a pattern that is compiled with -this context. The result is always zero. If a pattern that is passed to -\fBpcre2_compile()\fP with this context needs more memory, an error is +memory needed to hold the compiled version of a pattern that is using this +context. The result is always zero. If a pattern that is passed to +\fBpcre2_compile()\fP referencing this context needs more memory, an error is generated. The default is the largest number that a PCRE2_SIZE variable can hold, which is effectively unlimited. .P diff --git a/doc/pcre2_set_max_pattern_length.3 b/doc/pcre2_set_max_pattern_length.3 index 7aa01c7..0873a71 100644 --- a/doc/pcre2_set_max_pattern_length.3 +++ b/doc/pcre2_set_max_pattern_length.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 October 2016" "PCRE2 10.23" +.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 October 2016" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_max_varlookbehind.3 b/doc/pcre2_set_max_varlookbehind.3 index 84639d9..a4757e9 100644 --- a/doc/pcre2_set_max_varlookbehind.3 +++ b/doc/pcre2_set_max_varlookbehind.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_NEWLINE 3 "09 August 2023" "PCRE2 10.43" +.TH PCRE2_SET_NEWLINE 3 "11 August 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_newline.3 b/doc/pcre2_set_newline.3 index 0bccfc7..c33b4b6 100644 --- a/doc/pcre2_set_newline.3 +++ b/doc/pcre2_set_newline.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_NEWLINE 3 "26 May 2017" "PCRE2 10.30" +.TH PCRE2_SET_NEWLINE 3 "19 July 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_offset_limit.3 b/doc/pcre2_set_offset_limit.3 index 20fa104..48555cd 100644 --- a/doc/pcre2_set_offset_limit.3 +++ b/doc/pcre2_set_offset_limit.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_OFFSET_LIMIT 3 "22 September 2015" "PCRE2 10.21" +.TH PCRE2_SET_OFFSET_LIMIT 3 "22 September 2015" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_optimize.3 b/doc/pcre2_set_optimize.3 new file mode 100644 index 0000000..bd22dad --- /dev/null +++ b/doc/pcre2_set_optimize.3 @@ -0,0 +1,42 @@ +.TH PCRE2_SET_OPTIMIZE 3 "22 September 2024" "PCRE2 10.45-RC1" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_optimize(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIdirective\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function controls which performance optimizations will be applied +by \fBpcre2_compile()\fP. It can be called multiple times with the same compile +context; the effects are cumulative, with the effects of later calls taking +precedence over earlier ones. +.P +The result is zero for success, PCRE2_ERROR_NULL if \fIccontext\fP is NULL, +or PCRE2_ERROR_BADOPTION if \fIdirective\fP is unknown. The latter could be +useful to detect if a certain optimization is available. +.P +The list of possible values for the \fIdirective\fP parameter are: +.sp + PCRE2_OPTIMIZATION_FULL Enable all optimizations (default) + PCRE2_OPTIMIZATION_NONE Disable all optimizations + PCRE2_AUTO_POSSESS Enable auto-possessification + PCRE2_AUTO_POSSESS_OFF Disable auto-possessification + PCRE2_DOTSTAR_ANCHOR Enable implicit dotstar anchoring + PCRE2_DOTSTAR_ANCHOR_OFF Disable implicit dotstar anchoring + PCRE2_START_OPTIMIZE Enable start-up optimizations at match time + PCRE2_START_OPTIMIZE_OFF Disable start-up optimizations at match time +.sp +There is a complete description of the PCRE2 native API, including detailed +descriptions \fIdirective\fP parameter values in the +.\" HREF +\fBpcre2api\fP +.\" +page. diff --git a/doc/pcre2_set_parens_nest_limit.3 b/doc/pcre2_set_parens_nest_limit.3 index 0367619..d200894 100644 --- a/doc/pcre2_set_parens_nest_limit.3 +++ b/doc/pcre2_set_parens_nest_limit.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_PARENS_NEST_LIMIT 3 "22 October 2014" "PCRE2 10.00" +.TH PCRE2_SET_PARENS_NEST_LIMIT 3 "25 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_recursion_limit.3 b/doc/pcre2_set_recursion_limit.3 index 26f4257..bb55cce 100644 --- a/doc/pcre2_set_recursion_limit.3 +++ b/doc/pcre2_set_recursion_limit.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_RECURSION_LIMIT 3 "25 March 2017" "PCRE2 10.30" +.TH PCRE2_SET_RECURSION_LIMIT 3 "19 July 2017" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_recursion_memory_management.3 b/doc/pcre2_set_recursion_memory_management.3 index 743c288..78e4fb5 100644 --- a/doc/pcre2_set_recursion_memory_management.3 +++ b/doc/pcre2_set_recursion_memory_management.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_RECURSION_MEMORY_MANAGEMENT 3 "25 March 2017" "PCRE2 10.30" +.TH PCRE2_SET_RECURSION_MEMORY_MANAGEMENT 3 "23 January 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_set_substitute_callout.3 b/doc/pcre2_set_substitute_callout.3 index cdd7ac6..ebdac7c 100644 --- a/doc/pcre2_set_substitute_callout.3 +++ b/doc/pcre2_set_substitute_callout.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "12 November 2018" "PCRE2 10.33" +.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "04 October 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -8,7 +8,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) .PP .nf .B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, -.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *)," +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," .B " void *\fIcallout_data\fP);" .fi . diff --git a/doc/pcre2_set_substitute_case_callout.3 b/doc/pcre2_set_substitute_case_callout.3 new file mode 100644 index 0000000..bf1dd7c --- /dev/null +++ b/doc/pcre2_set_substitute_case_callout.3 @@ -0,0 +1,33 @@ +.TH PCRE2_SET_SUBSTITUTE_CASE_CALLOUT 3 "26 December 2024" "PCRE2 10.45-RC1" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the substitute case callout fields in a match context (the +first argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substitute.3 b/doc/pcre2_substitute.3 index 7ee4b6a..bea57c9 100644 --- a/doc/pcre2_substitute.3 +++ b/doc/pcre2_substitute.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTITUTE 3 "22 January 2020" "PCRE2 10.35" +.TH PCRE2_SUBSTITUTE 3 "27 November 2021" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_copy_byname.3 b/doc/pcre2_substring_copy_byname.3 index d2af63b..d91f86a 100644 --- a/doc/pcre2_substring_copy_byname.3 +++ b/doc/pcre2_substring_copy_byname.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_COPY_BYNAME 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_COPY_BYNAME 3 "19 December 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_copy_bynumber.3 b/doc/pcre2_substring_copy_bynumber.3 index 4cee2b4..7eb3002 100644 --- a/doc/pcre2_substring_copy_bynumber.3 +++ b/doc/pcre2_substring_copy_bynumber.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_COPY_BYNUMBER 3 "13 December 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_COPY_BYNUMBER 3 "13 December 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_free.3 b/doc/pcre2_substring_free.3 index 6d0fd58..eae854b 100644 --- a/doc/pcre2_substring_free.3 +++ b/doc/pcre2_substring_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_FREE 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2_SUBSTRING_FREE 3 "28 June 2018" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_get_byname.3 b/doc/pcre2_substring_get_byname.3 index 6c3f7d5..7466d71 100644 --- a/doc/pcre2_substring_get_byname.3 +++ b/doc/pcre2_substring_get_byname.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_GET_BYNAME 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_GET_BYNAME 3 "19 December 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_get_bynumber.3 b/doc/pcre2_substring_get_bynumber.3 index 51b6a04..51a6401 100644 --- a/doc/pcre2_substring_get_bynumber.3 +++ b/doc/pcre2_substring_get_bynumber.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_GET_BYNUMBER 3 "13 December 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_GET_BYNUMBER 3 "13 December 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_length_byname.3 b/doc/pcre2_substring_length_byname.3 index 84cdc6a..a79a0b1 100644 --- a/doc/pcre2_substring_length_byname.3 +++ b/doc/pcre2_substring_length_byname.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_LENGTH_BYNAME 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_LENGTH_BYNAME 3 "21 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_length_bynumber.3 b/doc/pcre2_substring_length_bynumber.3 index 12778d6..7a3b199 100644 --- a/doc/pcre2_substring_length_bynumber.3 +++ b/doc/pcre2_substring_length_bynumber.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_LENGTH_BYNUMBER 3 "22 December 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_LENGTH_BYNUMBER 3 "22 December 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_list_free.3 b/doc/pcre2_substring_list_free.3 index 2c6fb02..c05972c 100644 --- a/doc/pcre2_substring_list_free.3 +++ b/doc/pcre2_substring_list_free.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_LIST_FREE 3 "02 December 2023" "PCRE2 10.43" +.TH PCRE2_SUBSTRING_LIST_FREE 3 "02 December 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_list_get.3 b/doc/pcre2_substring_list_get.3 index bdc400e..deef7b7 100644 --- a/doc/pcre2_substring_list_get.3 +++ b/doc/pcre2_substring_list_get.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_LIST_GET 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_LIST_GET 3 "21 October 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_nametable_scan.3 b/doc/pcre2_substring_nametable_scan.3 index 9ab58cd..50d6540 100644 --- a/doc/pcre2_substring_nametable_scan.3 +++ b/doc/pcre2_substring_nametable_scan.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_NAMETABLE_SCAN 3 "03 February 2019" "PCRE2 10.33" +.TH PCRE2_SUBSTRING_NAMETABLE_SCAN 3 "06 February 2019" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2_substring_number_from_name.3 b/doc/pcre2_substring_number_from_name.3 index b077b1d..5c3a96e 100644 --- a/doc/pcre2_substring_number_from_name.3 +++ b/doc/pcre2_substring_number_from_name.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTRING_NUMBER_FROM_NAME 3 "21 October 2014" "PCRE2 10.00" +.TH PCRE2_SUBSTRING_NUMBER_FROM_NAME 3 "03 November 2014" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 6028d62..0e2a3b6 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "24 April 2024" "PCRE2 10.44" +.TH PCRE2API 3 "26 December 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -115,6 +115,9 @@ document for an overview of all the PCRE2 documentation. .sp .B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, .B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.sp +.B int pcre2_set_optimize(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIdirective\fP);" .fi . . @@ -138,6 +141,12 @@ document for an overview of all the PCRE2 documentation. .B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," .B " void *\fIcallout_data\fP);" .sp +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.sp .B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, .B " PCRE2_SIZE \fIvalue\fP);" .sp @@ -738,6 +747,7 @@ following compile-time parameters: The compile time nested parentheses limit The maximum length of the pattern string The extra options bits (none set by default) + Which performance optimizations the compiler should apply .sp A compile context is also required if you are using custom memory management. If none of these apply, just pass NULL as the context argument of @@ -881,6 +891,105 @@ The first argument to the callout function gives the current depth of nesting, and the second is user data that is set up by the last argument of \fBpcre2_set_compile_recursion_guard()\fP. The callout function should return zero if all is well, or non-zero to force an error. +.sp +.nf +.B int pcre2_set_optimize(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIdirective\fP);" +.fi +.sp +PCRE2 can apply various performance optimizations during compilation, in order +to make matching faster. For example, the compiler might convert some regex +constructs into an equivalent construct which \fBpcre2_match()\fP can execute +faster. By default, all available optimizations are enabled. However, in rare +cases, one might wish to disable specific optimizations. For example, if it is +known that some optimizations cannot benefit a certain regex, it might be +desirable to disable them, in order to speed up compilation. +.P +The permitted values of \fIdirective\fP are as follows: +.sp + PCRE2_OPTIMIZATION_FULL +.sp +Enable all optional performance optimizations. This is the default value. +.sp + PCRE2_OPTIMIZATION_NONE +.sp +Disable all optional performance optimizations. +.sp + PCRE2_AUTO_POSSESS + PCRE2_AUTO_POSSESS_OFF +.sp +Enable/disable "auto-possessification" of variable quantifiers such as * and +. +This optimization, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +disable this optimization if you want the matching functions to do a full, +unoptimized search and run all the callouts. +.sp + PCRE2_DOTSTAR_ANCHOR + PCRE2_DOTSTAR_ANCHOR_OFF +.sp +Enable/disable an optimization that is applied when .* is the first significant +item in a top-level branch of a pattern, and all the other branches also start +with .* or with \eA or \eG or ^. Such a pattern is automatically anchored if +PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set for any +^ items. Otherwise, the fact that any match must start either at the start of +the subject or following a newline is remembered. Like other optimizations, +this can cause callouts to be skipped. +.P +Dotstar anchor optimization is automatically disabled for .* if it is inside an +atomic group or a capture group that is the subject of a backreference, or if +the pattern contains (*PRUNE) or (*SKIP). +.sp + PCRE2_START_OPTIMIZE + PCRE2_START_OPTIMIZE_OFF +.sp +Enable/disable optimizations which cause matching functions to scan the subject +string for specific code unit values before attempting a match. For example, if +it is known that an unanchored match must start with a specific value, the +matching code searches the subject for that value, and fails immediately if it +cannot find it, without actually running the main matching function. This means +that a special item such as (*COMMIT) at the start of a pattern is not +considered until after a suitable starting point for the match has been found. +Also, when callouts or (*MARK) items are in use, these "start-up" optimizations +can cause them to be skipped if the pattern is never actually used. The start-up +optimizations are in effect a pre-scan of the subject that takes place before +the pattern is run. +.P +Disabling start-up optimizations ensures that in cases where the result is "no +match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are +considered at every possible starting position in the subject string. +.P +Disabling start-up optimizations may change the outcome of a matching operation. +Consider the pattern +.sp + (*COMMIT)ABC +.sp +When this is compiled, PCRE2 records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run without start-up optimizations, the initial scan along the subject +string does not happen. The first match attempt is run starting from "D" and +when this fails, (*COMMIT) prevents any further matches being tried, so the +overall result is "no match". +.P +Another start-up optimization makes use of a minimum length for a matching +subject, which is recorded when possible. Consider the pattern +.sp + (*MARK:1)B(*MARK:2)(X|Y) +.sp +The minimum length for a match is two characters. If the subject is "XXBB", the +"starting character" optimization skips "XX", then tries to match "BB", which +is long enough. In the process, (*MARK:2) is encountered and remembered. When +the match attempt fails, the next "B" is found, but there is only one character +left, so there are no more attempts, and "no match" is returned with the "last +mark seen" set to "2". Without start-up optimizations, however, matches are +tried at every possible starting position, including at the end of the subject, +where (*MARK:1) is encountered, but there is no "B", so the "last mark seen" +that is returned is "1". In this case, the optimizations do not affect the +overall match result, which is still "no match", but they do affect the +auxiliary information that is returned. . . .\" HTML @@ -943,6 +1052,22 @@ below. .\" .sp .nf +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +This sets up a callout function for PCRE2 to call when performing case +transformations inside \fBpcre2_substitute()\fP. Details are given in the +section entitled "Creating a new string with substitutions" +.\" HTML +.\" +below. +.\" +.sp +.nf .B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, .B " PCRE2_SIZE \fIvalue\fP);" .fi @@ -1153,7 +1278,10 @@ for the amount of heap memory used by \fBpcre2_match()\fP or The output is a uint32_t integer that is set to one if support for just-in-time compiling is included in the library; otherwise it is set to zero. Note that having the support in the library does not guarantee that JIT will be used for -any given match. See the +any given match, and neither does it guarantee that JIT will actually be able +to function, because it may not be able to allocate executable memory in some +environments. There is a special call to \fBpcre2_jit_compile()\fP that can be +used to check this. See the .\" HREF \fBpcre2jit\fP .\" @@ -1369,7 +1497,7 @@ error code and an offset (number of code units) within the pattern, respectively, when \fBpcre2_compile()\fP returns NULL because a compilation error has occurred. .P -There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return +There are over 100 positive error codes that \fBpcre2_compile()\fP may return if it finds an error in the pattern. There are also some negative error codes that are used for invalid UTF strings when validity checking is in force. These are the same as given by \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and @@ -1477,6 +1605,18 @@ after any internal newline. However, it does not match after a newline at the end of the subject, for compatibility with Perl. If you want a multiline circumflex also to match after a terminating newline, you must set PCRE2_ALT_CIRCUMFLEX. +.sp + PCRE2_ALT_EXTENDED_CLASS +.sp +Alters the parsing of character classes to follow the extended syntax +described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact +on the behaviour of the Perl-specific "(?[...])" syntax for extended classes, +but instead enables the alternative syntax of extended class behaviour inside +ordinary "[...]" character classes. See the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation for details of the character classes supported. .sp PCRE2_ALT_VERBNAMES .sp @@ -1509,15 +1649,27 @@ letters in the subject. It is equivalent to Perl's /i option, and it can be changed within a pattern by a (?i) option setting. If either PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all characters with more than one other case, and for all characters whose code points are greater than -U+007F. Note that there are two ASCII characters, K and S, that, in addition to +U+007F. +.P +Note that there are two ASCII characters, K and S, that, in addition to their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin sign) and U+017F (long S) respectively. If you do not want this case equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT. .P +One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +.P For lower valued characters with only one other case, a lookup table is used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used for all code points less than 256, and higher code points (available only in 16-bit or 32-bit mode) are treated as not having another case. +.P +From release 10.45 PCRE2_CASELESS also affects what some of the letter-related +Unicode property escapes (\ep and \eP) match. The properties Lu (upper case +letter), Ll (lower case letter), and Lt (title case letter) are all treated as +LC (cased letter) when PCRE2_CASELESS is set. .sp PCRE2_DOLLAR_ENDONLY .sp @@ -1717,7 +1869,7 @@ This option locks out the use of Unicode properties for handling \eB, \eb, \eD, for the PCRE2_UCP option below. In particular, it prevents the creator of the pattern from enabling this facility by starting the pattern with (*UCP). This option may be useful in applications that process patterns from external -sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. +sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error. .sp PCRE2_NEVER_UTF .sp @@ -1740,81 +1892,54 @@ though the reference can be by name or by number. .sp PCRE2_NO_AUTO_POSSESS .sp -If this option is set, it disables "auto-possessification", which is an -optimization that, for example, turns a+b into a++b in order to avoid +If this (deprecated) option is set, it disables "auto-possessification", which +is an optimization that, for example, turns a+b into a++b in order to avoid backtracks into a+ that can never be successful. However, if callouts are in use, auto-possessification means that some callouts are never taken. You can set this option if you want the matching functions to do a full unoptimized search and run all the callouts, but it is mainly provided for testing purposes. +.P +If a compile context is available, it is recommended to use +\fBpcre2_set_optimize()\fP with the \fIdirective\fP PCRE2_AUTO_POSSESS_OFF rather +than the compile option PCRE2_NO_AUTO_POSSESS. Note that PCRE2_NO_AUTO_POSSESS +takes precedence over the \fBpcre2_set_optimize()\fP optimization directives +PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF. .sp PCRE2_NO_DOTSTAR_ANCHOR .sp -If this option is set, it disables an optimization that is applied when .* is -the first significant item in a top-level branch of a pattern, and all the -other branches also start with .* or with \eA or \eG or ^. The optimization is -automatically disabled for .* if it is inside an atomic group or a capture -group that is the subject of a backreference, or if the pattern contains -(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is -automatically anchored if PCRE2_DOTALL is set for all the .* items and -PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match -must start either at the start of the subject or following a newline is +If this (deprecated) option is set, it disables an optimization that is applied +when .* is the first significant item in a top-level branch of a pattern, and +all the other branches also start with .* or with \eA or \eG or ^. The +optimization is automatically disabled for .* if it is inside an atomic group +or a capture group that is the subject of a backreference, or if the pattern +contains (*PRUNE) or (*SKIP). When the optimization is not disabled, such a +pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items +and PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any +match must start either at the start of the subject or following a newline is remembered. Like other optimizations, this can cause callouts to be skipped. +(If a compile context is available, it is recommended to use +\fBpcre2_set_optimize()\fP with the \fIdirective\fP PCRE2_DOTSTAR_ANCHOR_OFF +instead.) .sp PCRE2_NO_START_OPTIMIZE .sp This is an option whose main effect is at matching time. It does not change what \fBpcre2_compile()\fP generates, but it does affect the output of the JIT -compiler. +compiler. Setting this option is equivalent to calling \fBpcre2_set_optimize()\fP +with the \fIdirective\fP parameter set to PCRE2_START_OPTIMIZE_OFF. .P There are a number of optimizations that may occur at the start of a match, in order to speed up the process. For example, if it is known that an unanchored match must start with a specific code unit value, the matching code searches the subject for that value, and fails immediately if it cannot find it, without -actually running the main matching function. This means that a special item -such as (*COMMIT) at the start of a pattern is not considered until after a -suitable starting point for the match has been found. Also, when callouts or -(*MARK) items are in use, these "start-up" optimizations can cause them to be -skipped if the pattern is never actually used. The start-up optimizations are +actually running the main matching function. The start-up optimizations are in effect a pre-scan of the subject that takes place before the pattern is run. .P -The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, -possibly causing performance to suffer, but ensuring that in cases where the -result is "no match", the callouts do occur, and that items such as (*COMMIT) -and (*MARK) are considered at every possible starting position in the subject -string. -.P -Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation. -Consider the pattern -.sp - (*COMMIT)ABC -.sp -When this is compiled, PCRE2 records the fact that a match must start with the -character "A". Suppose the subject string is "DEFABC". The start-up -optimization scans along the subject, finds "A" and runs the first match -attempt from there. The (*COMMIT) item means that the pattern must match the -current starting position, which in this case, it does. However, if the same -match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the -subject string does not happen. The first match attempt is run starting from -"D" and when this fails, (*COMMIT) prevents any further matches being tried, so -the overall result is "no match". -.P -As another start-up optimization makes use of a minimum length for a matching -subject, which is recorded when possible. Consider the pattern -.sp - (*MARK:1)B(*MARK:2)(X|Y) -.sp -The minimum length for a match is two characters. If the subject is "XXBB", the -"starting character" optimization skips "XX", then tries to match "BB", which -is long enough. In the process, (*MARK:2) is encountered and remembered. When -the match attempt fails, the next "B" is found, but there is only one character -left, so there are no more attempts, and "no match" is returned with the "last -mark seen" set to "2". If NO_START_OPTIMIZE is set, however, matches are tried -at every possible starting position, including at the end of the subject, where -(*MARK:1) is encountered, but there is no "B", so the "last mark seen" that is -returned is "1". In this case, the optimizations do not affect the overall -match result, which is still "no match", but they do affect the auxiliary -information that is returned. +Disabling the start-up optimizations may cause performance to suffer. However, +this may be desirable for patterns which contain callouts or items such as +(*COMMIT) and (*MARK). See the above description of PCRE2_START_OPTIMIZE_OFF +for further details. .sp PCRE2_NO_UTF_CHECK .sp @@ -1884,9 +2009,17 @@ The second effect of PCRE2_UCP is to force the use of Unicode properties for upper/lower casing operations, even when PCRE2_UTF is not set. This makes it possible to process strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has been compiled with Unicode support (which is the default). -The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless +.P +The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless matching such that ASCII characters match only ASCII characters and non-ASCII -characters match only non-ASCII characters. +characters match only non-ASCII characters. The PCRE2_EXTRA_TURKISH_CASING option +(see above) alters the matching of the 'i' characters to follow their behaviour +in Turkish and Azeri languages. For further details on +PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the +.\" HREF +\fBpcre2unicode\fP +.\" +page. .sp PCRE2_UNGREEDY .sp @@ -2026,7 +2159,8 @@ characters. The ASCII letter S is case-equivalent to U+017f (long S) and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a caseless match, both characters must either be ASCII or non-ASCII. The option -can be changed with a pattern by the (?r) option setting. +can be changed within a pattern by the (*CASELESS_RESTRICT) or (?r) option +settings. .sp PCRE2_EXTRA_ESCAPED_CR_IS_LF .sp @@ -2053,6 +2187,36 @@ and the end. This is achieved by automatically inserting the code for "\eb(?:" at the start of the compiled pattern and ")\eb" at the end. The option may be used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is also set. +.sp + PCRE2_EXTRA_NO_BS0 +.sp +If this option is set (note that its final character is the digit 0) it locks +out the use of the sequence \e0 unless at least one more octal digit follows. +.sp + PCRE2_EXTRA_PYTHON_OCTAL +.sp +If this option is set, PCRE2 follows Python's rules for interpreting octal +escape sequences. The rules for handling sequences such as \e14, which could +be an octal number or a back reference are different. Details are given in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.sp + PCRE2_EXTRA_NEVER_CALLOUT +.sp +If this option is set, PCRE2 treats callouts in the pattern as a syntax error, +returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the application +knows that a callout will not be provided to \fBpcre2_match()\fP, so that +callouts in the pattern are not silently ignored. +.sp + PCRE2_EXTRA_TURKISH_CASING +.sp +This option alters case-equivalence of the 'i' letters to follow the +alphabet used by Turkish and Azeri languages. The option can be changed within +a pattern by the (*TURKISH_CASING) start-of-pattern setting. Either the UTF or +UCP options must be set. In the 8-bit library, UTF must be set. This option +cannot be combined with PCRE2_EXTRA_CASELESS_RESTRICT. . . .\" HTML @@ -2264,6 +2428,7 @@ following are true: PCRE2_DOTALL is in force for .* Neither (*PRUNE) nor (*SKIP) appears in the pattern PCRE2_NO_DOTSTAR_ANCHOR is not set + Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF .sp For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the options returned for PCRE2_INFO_ALLOPTIONS. @@ -3654,9 +3819,10 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If this option is set, however, \fBpcre2_substitute()\fP continues to go through the motions of matching and substituting (without, of course, writing anything) -in order to compute the size of buffer that is needed. This value is passed -back via the \fIoutlengthptr\fP variable, with the result of the function still -being PCRE2_ERROR_NOMEMORY. +in order to compute the size of buffer that is needed, which will include the +extra space for the terminating NUL. This value is passed back via the +\fIoutlengthptr\fP variable, with the result of the function still being +PCRE2_ERROR_NOMEMORY. .P Passing a buffer size of zero is a permitted way of finding out how much memory is needed for given substitution. However, this does mean that the entire @@ -3672,18 +3838,25 @@ If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted in any way. By default, however, a dollar character is an escape character that can specify the insertion of characters from capture groups and names from (*MARK) or other control verbs in the pattern. Dollar is the only escape -character (backslash is treated as literal). The following forms are always +character (backslash is treated as literal). The following forms are recognized: .sp $$ insert a dollar character - $ or ${} insert the contents of group + $n or ${n} insert the contents of group \fIn\fP + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string $*MARK or ${*MARK} insert a control verb name .sp -Either a group number or a group name can be given for . Curly brackets are -required only if the following character would be interpreted as part of the -number or name. The number may be zero to include the entire matched string. -For example, if the pattern a(b)c is matched with "=abc=" and the replacement -string "+$1$0$1+", the result is "=+babcb+=". +Either a group number or a group name can be given for \fIn\fP, for example $2 or +$NAME. Curly brackets are required only if the following character would be +interpreted as part of the number or name. The number may be zero to include +the entire matched string. For example, if the pattern a(b)c is matched with +"=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=". +.P +The JavaScript form $, where the angle brackets are part of the syntax, +is also recognized for group names, but not for group numbers or *MARK. .P $*MARK inserts the name from the last encountered backtracking control verb on the matching path that has a name. (*MARK) must always include a name, but the @@ -3732,46 +3905,69 @@ not influence the extended substitution syntax described below. PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the replacement string. Without this option, only the dollar character is special, and only the group insertion forms listed above are valid. When -PCRE2_SUBSTITUTE_EXTENDED is set, two things change: +PCRE2_SUBSTITUTE_EXTENDED is set, several things change: .P Firstly, backslash in a replacement string is interpreted as an escape -character. The usual forms such as \en or \ex{ddd} can be used to specify -particular character codes, and backslash followed by any non-alphanumeric -character quotes that character. Extended quoting can be coded using \eQ...\eE, -exactly as in pattern strings. +character. The usual forms such as \ex{ddd} can be used to specify particular +character codes, and backslash followed by any non-alphanumeric character +quotes that character. Extended quoting can be coded using \eQ...\eE, exactly +as in pattern strings. The escapes \eb and \ev are interpreted as the +characters backspace and vertical tab, respectively. +.P +The interpretation of backslash followed by one or more digits is the same as +in a pattern, which in Perl has some ambiguities. Details are given in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. +.P +The Python form \eg, where the angle brackets are part of the syntax and \fIn\fP +is either a group name or number, is recognized as an altertive way of +inserting the contents of a group, for example \eg<3>. .P There are also four escape sequences for forcing the case of inserted letters. -The insertion mechanism has three states: no case forcing, force upper case, -and force lower case. The escape sequences change the current state: \eU and -\eL change to upper or lower case forcing, respectively, and \eE (when not -terminating a \eQ quoted sequence) reverts to no case forcing. The sequences -\eu and \el force the next character (if it is a letter) to upper or lower -case, respectively, and then the state automatically reverts to no case -forcing. Case forcing applies to all inserted characters, including those from -capture groups and letters within \eQ...\eE quoted sequences. If either -PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode +Case forcing applies to all inserted characters, including those from capture +groups and letters within \eQ...\eE quoted sequences. The insertion mechanism +has three states: no case forcing, force upper case, and force lower case. The +escape sequences change the current state: \eU and \eL change to upper or lower +case forcing, respectively, and \eE (when not terminating a \eQ quoted +sequence) reverts to no case forcing. The sequences \eu and \el force the next +character (if it is a letter) to upper or lower case, respectively, and then +the state automatically reverts to no case forcing. +.P +However, if \eu is immediately followed by \eL or \el is immediately followed +by \eU, the next character's case is forced by the first escape sequence, and +subsequent characters by the second. This provides a "title casing" facility +that can be applied to group captures. For example, if group 1 has captured +"heLLo", the replacement string "\eu\eL$1" becomes "Hello". +.P +If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater -than 127. +than 127. However, only simple case folding, as determined by the Unicode file +\fBCaseFolding.txt\fP is supported. PCRE2 does not support language-specific +special casing rules such as using different lower case Greek sigmas in the +middle and ends of words (as defined in the Unicode file +\fBSpecialCasing.txt\fP). .P Note that case forcing sequences such as \eU...\eE do not nest. For example, the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do not apply to replacement strings. .P -The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more flexibility to capture group substitution. The syntax is similar to that used by Bash: .sp - ${:-} - ${:+:} + ${n:-string} + ${n:+string1:string2} .sp -As before, may be a group number or a name. The first form specifies a -default value. If group is set, its value is inserted; if not, is -expanded and the result inserted. The second form specifies strings that are -expanded and inserted when group is set or unset, respectively. The first -form is just a convenient shorthand for +As in the simple case, \fIn\fP may be a group number or a name. The first form +specifies a default value. If group \fIn\fP is set, its value is inserted; if +not, the string is expanded and the result inserted. The second form specifies +strings that are expanded and inserted when group \fIn\fP is set or unset, +respectively. The first form is just a convenient shorthand for .sp - ${:+${}:} + ${n:+${n}:string} .sp Backslash can be used to escape colons and closing curly brackets in the replacement strings. A change of the case forcing state within a replacement @@ -3846,9 +4042,17 @@ above). The \fBpcre2_set_substitution_callout()\fP function can be used to specify a callout function for \fBpcre2_substitute()\fP. This information is passed in a match context. The callout function is called after each substitution has -been processed, but it can cause the replacement not to happen. The callout -function is not called for simulated substitutions that happen as a result of -the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. +been processed, but it can cause the replacement not to happen. +.P +The callout function is not called for simulated substitutions that happen as a +result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when +substitution processing exceeds the buffer space provided by the caller, +processing continues by counting code units. The simulation is unable to +populate the callout block, and so the simulation is pessimistic about the +required buffer size. Whichever is larger of accepted or rejected substitution +is reported as the required size. Therefore, the returned buffer length may be +an overestimate (without a substitution callout, it is normally an exact +measurement). .P The first argument of the callout function is a pointer to a substitute callout block structure, which contains the following fields, not necessarily in this @@ -3892,6 +4096,97 @@ output and the call to \fBpcre2_substitute()\fP exits, returning the number of matches so far. . . +.SS "Substitution case callouts" +.rs +.sp +.nf +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +The \fBpcre2_set_substitution_case_callout()\fP function can be used to specify +a callout function for \fBpcre2_substitute()\fP to use when performing case +transformations. This does not affect any case insensitivity behaviour when +performing a match, but only the user-visible transformations performed when +processing a substitution such as: +.sp + pcre2_substitute(..., "\e\eU$1", ...) +.P +The default case transformations applied by PCRE2 are reasonably complete, and, +in UTF or UCP mode, perform the simple locale-invariant case transformations as +specified by Unicode. This is suitable for the internal (invisible) +case-equivalence procedures used during pattern matching, but an application +may wish to use more sophisticated locale-aware processing for the user-visible +substitution transformations. +.P +One example implementation of the \fIcallout_function\fP using the ICU +library would be: +.sp +.nf + PCRE2_SIZE + icu_case_callout( + PCRE2_SPTR input, PCRE2_SIZE input_len, + PCRE2_UCHAR *output, PCRE2_SIZE output_cap, + int to_case, void *data_ptr) + { + UErrorCode err = U_ZERO_ERROR; + int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER + ? u_strToLower(output, output_cap, input, input_len, NULL, &err) + : to_case == PCRE2_SUBSTITUTE_CASE_UPPER + ? u_strToUpper(output, output_cap, input, input_len, NULL, &err) + : u_strToTitle(output, output_cap, input, input_len, &first_char_only, + NULL, &err); + if (U_FAILURE(err)) return (~(PCRE2_SIZE)0); + return r; + } +.fi +.P +The first and second arguments of the case callout function are the Unicode +string to transform. +.P +The third and fourth arguments are the output buffer and its capacity. +.P +The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, +PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase +and the rest to Unicode lowercase (note that titlecasing sometimes uses Unicode +properties to titlecase each word in a string; but PCRE2 is requesting that only +the single leading character is to be titlecased). +.P +The sixth argument is the \fIcallout_data\fP supplied to +\fBpcre2_set_substitute_case_callout()\fP. +.P +The resulting string in the destination buffer may be larger or smaller than the +input, if the casing rules merge or split characters. The return value is the +length required for the output string. If a buffer of sufficient size was +provided to the callout, then the result must be written to the buffer and the +number of code units returned. If the result does not fit in the provided +buffer, then the required capacity must be returned and PCRE2 will not make use +of the output buffer. PCRE2 provides input and output buffers which overlap, so +the callout must support this by suitable internal buffering. +.P +Alternatively, if the callout wishes to indicate an error, then it may return +(~(PCRE2_SIZE)0). In this case pcre2_substitute() will immediately fail with +error PCRE2_ERROR_REPLACECASE. +.P +When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH +option, there are situations when pcre2_substitute() will return an +underestimate of the required buffer size. If you call pcre2_substitute() once +with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the input buffer is too small for +the replacement string to be constructed, then instead of calling the case +callout, pcre2_substitute() will make an estimate of the required buffer size. +The second call should also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that +second call is not guaranteed to succeed either, if the case callout requires +more buffer space than expected. The caller must make repeated attempts in a +loop. +. +. .SH "DUPLICATE CAPTURE GROUP NAMES" .rs .sp @@ -4182,6 +4477,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 24 April 2024 +Last updated: 26 December 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2build.3 b/doc/pcre2build.3 index 1df4ebd..cf55e75 100644 --- a/doc/pcre2build.3 +++ b/doc/pcre2build.3 @@ -1,4 +1,4 @@ -.TH PCRE2BUILD 3 "15 April 2024" "PCRE2 10.44" +.TH PCRE2BUILD 3 "16 April 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) . @@ -660,6 +660,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 15 April 2024 +Last updated: 16 April 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2callout.3 b/doc/pcre2callout.3 index 86a1c54..be727d8 100644 --- a/doc/pcre2callout.3 +++ b/doc/pcre2callout.3 @@ -1,4 +1,4 @@ -.TH PCRE2CALLOUT 3 "19 January 2024" "PCRE2 10.43" +.TH PCRE2CALLOUT 3 "19 January 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS diff --git a/doc/pcre2compat.3 b/doc/pcre2compat.3 index 8313e03..db1d564 100644 --- a/doc/pcre2compat.3 +++ b/doc/pcre2compat.3 @@ -1,4 +1,4 @@ -.TH PCRE2COMPAT 3 "30 November 2023" "PCRE2 10.43" +.TH PCRE2COMPAT 3 "02 October 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "DIFFERENCES BETWEEN PCRE2 AND PERL" @@ -54,7 +54,7 @@ interprets them. 7. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \ep and \eP are limited to the general category properties such as Lu and -Nd, the derived properties Any and LC (synonym L&), script names such as Greek +Nd, the derived properties Any and Lc (synonym L&), script names such as Greek or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See the @@ -85,7 +85,12 @@ following examples: \eQ\e\eE \e \e\eE .sp The \eQ...\eE sequence is recognized both inside and outside character classes -by both PCRE2 and Perl. +by both PCRE2 and Perl. Another difference from Perl is that any appearance of +\eQ or \eE inside what might otherwise be a quantifier causes PCRE2 not to +recognize the sequence as a quantifier. Perl recognizes a quantifier if +(redundantly) either of the numbers is inside \eQ...\eE, but not if the +separating comma is. When not recognized as a quantifier a sequence such as +{\eQ1\eE,2} is treated as the literal string "{1,2}". .P 9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an @@ -105,7 +110,9 @@ confined to that group; it does not extend to the surrounding pattern. This is not always the case in Perl. In particular, if (*THEN) is present in a group that is called as a subroutine, its action is limited to that group, even if the group does not contain any | characters. Note that such groups are -processed as anchored at the point where they are tested. +processed as anchored at the point where they are tested. PCRE2 also confines +all control verbs within atomic assertions, again including (*THEN) in +assertions with only one branch. .P 12. If a pattern contains more than one backtracking control verb, the first one that is backtracked onto acts. For example, in the pattern @@ -138,11 +145,11 @@ Perl behaves differently. warning features, so it gives an error in these cases because they are almost certainly user mistakes. .P -17. In PCRE2, the upper/lower case character properties Lu and Ll are not -affected when case-independent matching is specified. For example, \ep{Lu} -always matches an upper case letter. I think Perl has changed in this respect; -in the release at the time of writing (5.38), \ep{Lu} and \ep{Ll} match all -letters, regardless of case, when case independence is specified. +17. In PCRE2, until release 10.45, the upper/lower case character properties Lu +and Ll were not affected when case-independent matching was specified. Perl has +changed in this respect, and PCRE2 has now changed to match. When caseless +matching is in force, Lu, Ll, and Lt (title case) are all treated as Lc (cased +letter). .P 18. From release 5.32.0, Perl locks out the use of \eK in lookaround assertions. From release 10.38 PCRE2 does the same by default. However, there @@ -196,6 +203,9 @@ and condition references such as (?(4)...). PCRE2 supports relative group numbers such as +2 and -4 in all three cases. Perl supports both plus and minus for subroutine calls, but only minus for back references, and no relative numbering at all for conditions. +.sp +(m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 extension +that is not available in Perl. .P 20. Perl has different limits than PCRE2. See the .\" HREF @@ -215,6 +225,16 @@ to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top. handled by PCRE2, either by the interpreter or the JIT. An example is /(?:|(?0)abcd)(?(R)|\ez)/, which matches a sequence of any number of repeated "abcd" substrings at the end of the subject. +.P +23. Both PCRE2 and Perl error when \ex{ escapes are invalid, but Perl tries to +recover and prints a warning if the problem was that an invalid hexadecimal +digit was found, since PCRE2 doesn't have warnings it returns an error instead. +Additionally, Perl accepts \ex{} and generates NUL unlike PCRE2. +.P +24. From release 10.45, PCRE2 gives an error if \ex is not followed by a +hexadecimal digit or a curly bracket. It used to interpret this as the NUL +character. Perl still generates NUL, but warns when in warning mode in most +cases. . . .SH AUTHOR @@ -231,6 +251,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 November 2023 -Copyright (c) 1997-2023 University of Cambridge. +Last updated: 02 October 2024 +Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2convert.3 b/doc/pcre2convert.3 index 62c7ebb..4e6b7c5 100644 --- a/doc/pcre2convert.3 +++ b/doc/pcre2convert.3 @@ -1,4 +1,4 @@ -.TH PCRE2CONVERT 3 "28 June 2018" "PCRE2 10.32" +.TH PCRE2CONVERT 3 "14 November 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS" @@ -159,6 +159,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 28 June 2018 +Last updated: 14 November 2023 Copyright (c) 1997-2018 University of Cambridge. .fi diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3 index 0453a94..43a6e30 100644 --- a/doc/pcre2demo.3 +++ b/doc/pcre2demo.3 @@ -1,4 +1,4 @@ -.TH PCRE2DEMO 3 " 7 June 2024" "PCRE2 10.44" +.TH PCRE2DEMO 3 "31 August 2021" "PCRE2 10.45-RC1" .\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT! .SH NAME PCRE2DEMO - A demonstration C program for PCRE2 diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 index ffe9d39..837a576 100644 --- a/doc/pcre2grep.1 +++ b/doc/pcre2grep.1 @@ -1,4 +1,4 @@ -.TH PCRE2GREP 1 "22 December 2023" "PCRE2 10.43" +.TH PCRE2GREP 1 "09 October 2024" "PCRE2 10.45-RC1" .SH NAME pcre2grep - a grep with Perl-compatible regular expressions. .SH SYNOPSIS @@ -337,9 +337,10 @@ Read patterns from the file, one per line. As is the case with patterns on the command line, no delimiters should be used. What constitutes a newline when reading the file is the operating system's default interpretation of \en. The \fB--newline\fP option has no effect on this option. Trailing white space is -removed from each line, and blank lines are ignored. An empty file contains no +removed from each line, and blank lines are ignored unless the +\fB--posix-pattern-file\fP option is also provided. An empty file contains no patterns and therefore matches nothing. Patterns read from a file in this way -may contain binary zeros, which are treated as ordinary data characters. +may contain binary zeros, which are treated as ordinary character literals. .sp If this option is given more than once, all the specified files are read. A data line is output if any of the patterns match it. A file name can be given @@ -628,9 +629,9 @@ contents of the matched part of the line and/or captured substrings into the text. .sp $ or ${} is replaced by the captured substring of the given -decimal number; zero substitutes the whole match. If the number is greater than -the number of capturing substrings, or if the capture is unset, the replacement -is empty. +decimal number; $& (or the legacy $0) substitutes the whole match. If the +number is greater than the number of capturing substrings, or if the capture +is unset, the replacement is empty. .sp $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by newline; $r by carriage return; $t by tab; $v by vertical tab. @@ -701,6 +702,14 @@ option settings within patterns that affect individual classes. For example, when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while allowing \ew to match Unicode letters and digits. .TP +\fB--posix-pattern-file\fP +When patterns are provided with the \fB-f\fP option, do not trim trailing +spaces or ignore empty lines in a similar way than other grep tools. To keep +the behaviour consistent with older versions, if the pattern read was +terminated with CRLF (as character literals) then both characters won't be +included as part of it, so if you really need to have pattern ending in '\er', +use a escape sequence or provide it by a different method. +.TP \fB-q\fP, \fB--quiet\fP Work quietly, that is, display nothing except error messages. The exit status indicates whether or not any matches were found. @@ -882,7 +891,7 @@ scripts or echoing specific strings during matching by making use of PCRE2's callout facility. However, this support can be completely or partially disabled when \fBpcre2grep\fP is built. You can find out whether your binary has support for callouts by running it with the \fB--help\fP option. If callout support is -completely disabled, all callouts in patterns are ignored by \fBpcre2grep\fP. +completely disabled, callouts in patterns are forbidden by \fBpcre2grep\fP. If the facility is partially disabled, calling external programs is not supported, and callouts that request it are ignored. .P @@ -905,9 +914,9 @@ available, provided that callouts were not completely disabled when zero-terminated string, which means it should not contain any internal binary zeros. It is written to the output, having first been passed through the same escape processing as text from the \fB--output\fP (\fB-O\fP) option (see -above). However, $0 cannot be used to insert a matched substring because the -match is still in progress. Instead, the single character '0' is inserted. Any -syntax errors in the string (for example, a dollar not followed by another +above). However, $0 or $& cannot be used to insert a matched substring because +the match is still in progress. Instead, the single character '0' is inserted. +Any syntax errors in the string (for example, a dollar not followed by another character) causes the callout to be ignored. No terminator is added to the output string, so if you want a newline, you must include it explicitly using the escape $n. For example: @@ -936,9 +945,9 @@ arguments: .sp Any substring (including the executable name) may contain escape sequences started by a dollar character. These are the same as for the \fB--output\fP -(\fB-O\fP) option documented above, except that $0 cannot insert the matched -string because the match is still in progress. Instead, the character '0' -is inserted. If you need a literal dollar or pipe character in any +(\fB-O\fP) option documented above, except that $0 or $& cannot insert the +matched string because the match is still in progress. Instead, the character +'0' is inserted. If you need a literal dollar or pipe character in any substring, use $$ or $| respectively. Here is an example: .sp echo -e "abcde\en12345" | pcre2grep \e @@ -1013,6 +1022,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 22 December 2023 +Last updated: 09 October 2024 Copyright (c) 1997-2023 University of Cambridge. .fi diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt index 7914c45..f91f654 100644 --- a/doc/pcre2grep.txt +++ b/doc/pcre2grep.txt @@ -1,4 +1,3 @@ - PCRE2GREP(1) General Commands Manual PCRE2GREP(1) @@ -366,139 +365,140 @@ OPTIONS used. What constitutes a newline when reading the file is the operating system's default interpretation of \n. The --new- line option has no effect on this option. Trailing white - space is removed from each line, and blank lines are ignored. - An empty file contains no patterns and therefore matches - nothing. Patterns read from a file in this way may contain - binary zeros, which are treated as ordinary data characters. - - If this option is given more than once, all the specified - files are read. A data line is output if any of the patterns - match it. A file name can be given as "-" to refer to the - standard input. When -f is used, patterns specified on the - command line using -e may also be present; they are matched + space is removed from each line, and blank lines are ignored + unless the --posix-pattern-file option is also provided. An + empty file contains no patterns and therefore matches noth- + ing. Patterns read from a file in this way may contain binary + zeros, which are treated as ordinary character literals. + + If this option is given more than once, all the specified + files are read. A data line is output if any of the patterns + match it. A file name can be given as "-" to refer to the + standard input. When -f is used, patterns specified on the + command line using -e may also be present; they are matched before the file's patterns. However, no pattern is taken from - the command line; all arguments are treated as the names of + the command line; all arguments are treated as the names of paths to be searched. --file-list=filename - Read a list of files and/or directories that are to be + Read a list of files and/or directories that are to be scanned from the given file, one per line. What constitutes a - newline when reading the file is the operating system's de- - fault. Trailing white space is removed from each line, and + newline when reading the file is the operating system's de- + fault. Trailing white space is removed from each line, and blank lines are ignored. These paths are processed before any - that are listed on the command line. The file name can be - given as "-" to refer to the standard input. If --file and - --file-list are both specified as "-", patterns are read - first. This is useful only when the standard input is a ter- - minal, from which further lines (the list of files) can be + that are listed on the command line. The file name can be + given as "-" to refer to the standard input. If --file and + --file-list are both specified as "-", patterns are read + first. This is useful only when the standard input is a ter- + minal, from which further lines (the list of files) can be read after an end-of-file indication. If this option is given more than once, all the specified files are read. --file-offsets - Instead of showing lines or parts of lines that match, show - each match as an offset from the start of the file and a - length, separated by a comma. In this mode, --colour has no - effect, and no context is shown. That is, the -A, -B, and -C - options are ignored. If there is more than one match in a - line, each of them is shown separately. This option is mutu- - ally exclusive with --output, --line-offsets, and --only- + Instead of showing lines or parts of lines that match, show + each match as an offset from the start of the file and a + length, separated by a comma. In this mode, --colour has no + effect, and no context is shown. That is, the -A, -B, and -C + options are ignored. If there is more than one match in a + line, each of them is shown separately. This option is mutu- + ally exclusive with --output, --line-offsets, and --only- matching. --group-separator=text Output this text string instead of two hyphens between groups - of lines when -A, -B, or -C is in use. See also --no-group- + of lines when -A, -B, or -C is in use. See also --no-group- separator. -H, --with-filename - Force the inclusion of the file name at the start of output + Force the inclusion of the file name at the start of output lines when searching a single file. The file name is not nor- - mally shown in this case. By default, for matching lines, - the file name is followed by a colon; for context lines, a + mally shown in this case. By default, for matching lines, + the file name is followed by a colon; for context lines, a hyphen separator is used. The -Z option can be used to change the terminator to a zero byte. If a line number is also being output, it follows the file name. When the -M option causes a - pattern to match more than one line, only the first is pre- - ceded by the file name. This option overrides any previous + pattern to match more than one line, only the first is pre- + ceded by the file name. This option overrides any previous -h, -l, or -L options. -h, --no-filename Suppress the output file names when searching multiple files. File names are normally shown when multiple files are - searched. By default, for matching lines, the file name is + searched. By default, for matching lines, the file name is followed by a colon; for context lines, a hyphen separator is used. The -Z option can be used to change the terminator to a - zero byte. If a line number is also being output, it follows + zero byte. If a line number is also being output, it follows the file name. This option overrides any previous -H, -L, or -l options. --heap-limit=number See --match-limit below. - --help Output a help message, giving brief details of the command - options and file type support, and then exit. Anything else + --help Output a help message, giving brief details of the command + options and file type support, and then exit. Anything else on the command line is ignored. - -I Ignore binary files. This is equivalent to --binary- + -I Ignore binary files. This is equivalent to --binary- files=without-match. -i, --ignore-case - Ignore upper/lower case distinctions when pattern matching. + Ignore upper/lower case distinctions when pattern matching. This applies when matching path names for inclusion or exclu- sion as well as when matching lines in files. --include=pattern - If any --include patterns are specified, the only files that + If any --include patterns are specified, the only files that are processed are those whose names match one of the patterns - and do not match an --exclude pattern. This option does not - affect directories, but it applies to all files, whether - listed on the command line, obtained from --file-list, or by - scanning a directory. The pattern is a PCRE2 regular expres- - sion, and is matched against the final component of the file - name, not the entire path. The -F, -w, and -x options do not - apply to this pattern. The option may be given any number of - times. If a file name matches both an --include and an --ex- - clude pattern, it is excluded. There is no short form for + and do not match an --exclude pattern. This option does not + affect directories, but it applies to all files, whether + listed on the command line, obtained from --file-list, or by + scanning a directory. The pattern is a PCRE2 regular expres- + sion, and is matched against the final component of the file + name, not the entire path. The -F, -w, and -x options do not + apply to this pattern. The option may be given any number of + times. If a file name matches both an --include and an --ex- + clude pattern, it is excluded. There is no short form for this option. --include-from=filename - Treat each non-empty line of the file as the data for an + Treat each non-empty line of the file as the data for an --include option. What constitutes a newline for this purpose - is the operating system's default. The --newline option has + is the operating system's default. The --newline option has no effect on this option. This option may be given any number of times; all the files are read. --include-dir=pattern - If any --include-dir patterns are specified, the only direc- - tories that are processed are those whose names match one of - the patterns and do not match an --exclude-dir pattern. This - applies to all directories, whether listed on the command - line, obtained from --file-list, or by scanning a parent di- - rectory. The pattern is a PCRE2 regular expression, and is - matched against the final component of the directory name, - not the entire path. The -F, -w, and -x options do not apply + If any --include-dir patterns are specified, the only direc- + tories that are processed are those whose names match one of + the patterns and do not match an --exclude-dir pattern. This + applies to all directories, whether listed on the command + line, obtained from --file-list, or by scanning a parent di- + rectory. The pattern is a PCRE2 regular expression, and is + matched against the final component of the directory name, + not the entire path. The -F, -w, and -x options do not apply to this pattern. The option may be given any number of times. - If a directory matches both --include-dir and --exclude-dir, + If a directory matches both --include-dir and --exclude-dir, it is excluded. There is no short form for this option. -L, --files-without-match - Instead of outputting lines from the files, just output the - names of the files that do not contain any lines that would - have been output. Each file name is output once, on a sepa- - rate line by default, but if the -Z option is set, they are - separated by zero bytes instead of newlines. This option + Instead of outputting lines from the files, just output the + names of the files that do not contain any lines that would + have been output. Each file name is output once, on a sepa- + rate line by default, but if the -Z option is set, they are + separated by zero bytes instead of newlines. This option overrides any previous -H, -h, or -l options. -l, --files-with-matches - Instead of outputting lines from the files, just output the + Instead of outputting lines from the files, just output the names of the files containing lines that would have been out- - put. Each file name is output once, on a separate line, but + put. Each file name is output once, on a separate line, but if the -Z option is set, they are separated by zero bytes in- - stead of newlines. Searching normally stops as soon as a - matching line is found in a file. However, if the -c (count) - option is also used, matching continues in order to obtain - the correct count, and those files that have at least one - match are listed along with their counts. Using this option - with -c is a way of suppressing the listing of files with no + stead of newlines. Searching normally stops as soon as a + matching line is found in a file. However, if the -c (count) + option is also used, matching continues in order to obtain + the correct count, and those files that have at least one + match are listed along with their counts. Using this option + with -c is a way of suppressing the listing of files with no matches that occurs with -c on its own. This option overrides any previous -H, -h, or -L options. @@ -508,130 +508,130 @@ OPTIONS input)" is used. There is no short form for this option. --line-buffered - When this option is given, non-compressed input is read and - processed line by line, and the output is flushed after each - write. By default, input is read in large chunks, unless - pcre2grep can determine that it is reading from a terminal, + When this option is given, non-compressed input is read and + processed line by line, and the output is flushed after each + write. By default, input is read in large chunks, unless + pcre2grep can determine that it is reading from a terminal, which is currently possible only in Unix-like environments or Windows. Output to terminal is normally automatically flushed - by the operating system. This option can be useful when the - input or output is attached to a pipe and you do not want - pcre2grep to buffer up large amounts of data. However, its - use will affect performance, and the -M (multiline) option - ceases to work. When input is from a compressed .gz or .bz2 + by the operating system. This option can be useful when the + input or output is attached to a pipe and you do not want + pcre2grep to buffer up large amounts of data. However, its + use will affect performance, and the -M (multiline) option + ceases to work. When input is from a compressed .gz or .bz2 file, --line-buffered is ignored. --line-offsets - Instead of showing lines or parts of lines that match, show + Instead of showing lines or parts of lines that match, show each match as a line number, the offset from the start of the - line, and a length. The line number is terminated by a colon - (as usual; see the -n option), and the offset and length are - separated by a comma. In this mode, --colour has no effect, - and no context is shown. That is, the -A, -B, and -C options - are ignored. If there is more than one match in a line, each - of them is shown separately. This option is mutually exclu- + line, and a length. The line number is terminated by a colon + (as usual; see the -n option), and the offset and length are + separated by a comma. In this mode, --colour has no effect, + and no context is shown. That is, the -A, -B, and -C options + are ignored. If there is more than one match in a line, each + of them is shown separately. This option is mutually exclu- sive with --output, --file-offsets, and --only-matching. --locale=locale-name - This option specifies a locale to be used for pattern match- - ing. It overrides the value in the LC_ALL or LC_CTYPE envi- - ronment variables. If no locale is specified, the PCRE2 li- + This option specifies a locale to be used for pattern match- + ing. It overrides the value in the LC_ALL or LC_CTYPE envi- + ronment variables. If no locale is specified, the PCRE2 li- brary's default (usually the "C" locale) is used. There is no short form for this option. -M, --multiline - Allow patterns to match more than one line. When this option - is set, the PCRE2 library is called in "multiline" mode, and - a match is allowed to continue past the end of the initial + Allow patterns to match more than one line. When this option + is set, the PCRE2 library is called in "multiline" mode, and + a match is allowed to continue past the end of the initial line and onto one or more subsequent lines. - Patterns used with -M may usefully contain literal newline - characters and internal occurrences of ^ and $ characters, - because in multiline mode these can match at internal new- - lines. Because pcre2grep is scanning multiple lines, the \Z - and \z assertions match only at the end of the last line in + Patterns used with -M may usefully contain literal newline + characters and internal occurrences of ^ and $ characters, + because in multiline mode these can match at internal new- + lines. Because pcre2grep is scanning multiple lines, the \Z + and \z assertions match only at the end of the last line in the file. The \A assertion matches at the start of the first - line of a match. This can be any line in the file; it is not + line of a match. This can be any line in the file; it is not anchored to the first line. - The output for a successful match may consist of more than - one line. The first line is the line in which the match - started, and the last line is the line in which the match - ended. If the matched string ends with a newline sequence, - the output ends at the end of that line. If -v is set, none - of the lines in a multi-line match are output. Once a match - has been handled, scanning restarts at the beginning of the + The output for a successful match may consist of more than + one line. The first line is the line in which the match + started, and the last line is the line in which the match + ended. If the matched string ends with a newline sequence, + the output ends at the end of that line. If -v is set, none + of the lines in a multi-line match are output. Once a match + has been handled, scanning restarts at the beginning of the line after the one in which the match ended. - The newline sequence that separates multiple lines must be - matched as part of the pattern. For example, to find the - phrase "regular expression" in a file where "regular" might - be at the end of a line and "expression" at the start of the + The newline sequence that separates multiple lines must be + matched as part of the pattern. For example, to find the + phrase "regular expression" in a file where "regular" might + be at the end of a line and "expression" at the start of the next line, you could use this command: pcre2grep -M 'regular\s+expression' The \s escape sequence matches any white space character, in- - cluding newlines, and is followed by + so as to match trail- - ing white space on the first line as well as possibly han- + cluding newlines, and is followed by + so as to match trail- + ing white space on the first line as well as possibly han- dling a two-character newline sequence. - There is a limit to the number of lines that can be matched, - imposed by the way that pcre2grep buffers the input file as - it scans it. With a sufficiently large processing buffer, + There is a limit to the number of lines that can be matched, + imposed by the way that pcre2grep buffers the input file as + it scans it. With a sufficiently large processing buffer, this should not be a problem. - The -M option does not work when input is read line by line + The -M option does not work when input is read line by line (see --line-buffered.) -m number, --max-count=number - Stop processing after finding number matching lines, or non- - matching lines if -v is also set. Any trailing context lines - are output after the final match. In multiline mode, each - multiline match counts as just one line for this purpose. If - this limit is reached when reading the standard input from a + Stop processing after finding number matching lines, or non- + matching lines if -v is also set. Any trailing context lines + are output after the final match. In multiline mode, each + multiline match counts as just one line for this purpose. If + this limit is reached when reading the standard input from a regular file, the file is left positioned just after the last - matching line. If -c is also set, the count that is output - is never greater than number. This option has no effect if + matching line. If -c is also set, the count that is output + is never greater than number. This option has no effect if used with -L, -l, or -q, or when just checking for a match in a binary file. --match-limit=number - Processing some regular expression patterns may take a very + Processing some regular expression patterns may take a very long time to search for all possible matching strings. Others - may require a very large amount of memory. There are three + may require a very large amount of memory. There are three options that set resource limits for matching. The --match-limit option provides a means of limiting comput- - ing resource usage when processing patterns that are not go- + ing resource usage when processing patterns that are not go- ing to match, but which have a very large number of possibil- ities in their search trees. The classic example is a pattern - that uses nested unlimited repeats. Internally, PCRE2 has a - counter that is incremented each time around its main pro- - cessing loop. If the value set by --match-limit is reached, + that uses nested unlimited repeats. Internally, PCRE2 has a + counter that is incremented each time around its main pro- + cessing loop. If the value set by --match-limit is reached, an error occurs. - The --heap-limit option specifies, as a number of kibibytes + The --heap-limit option specifies, as a number of kibibytes (units of 1024 bytes), the maximum amount of heap memory that may be used for matching. - The --depth-limit option limits the depth of nested back- + The --depth-limit option limits the depth of nested back- tracking points, which indirectly limits the amount of memory that is used. The amount of memory needed for each backtrack- - ing point depends on the number of capturing parentheses in + ing point depends on the number of capturing parentheses in the pattern, so the amount of memory that is used before this - limit acts varies from pattern to pattern. This limit is of + limit acts varies from pattern to pattern. This limit is of use only if it is set smaller than --match-limit. - There are no short forms for these options. The default lim- - its can be set when the PCRE2 library is compiled; if they - are not specified, the defaults are very large and so effec- + There are no short forms for these options. The default lim- + its can be set when the PCRE2 library is compiled; if they + are not specified, the defaults are very large and so effec- tively unlimited. --max-buffer-size=number - This limits the expansion of the processing buffer, whose - initial size can be set by --buffer-size. The maximum buffer - size is silently forced to be no smaller than the starting + This limits the expansion of the processing buffer, whose + initial size can be set by --buffer-size. The maximum buffer + size is silently forced to be no smaller than the starting buffer size. -N newline-type, --newline=newline-type @@ -640,72 +640,72 @@ OPTIONS pcre2grep -N CRLF 'some pattern' - The newline type may be specified in upper, lower, or mixed - case. If the newline type is NUL, lines are separated by bi- - nary zero characters. The other types are the single-charac- - ter sequences CR (carriage return) and LF (linefeed), the - two-character sequence CRLF, an "anycrlf" type, which recog- - nizes any of the preceding three types, and an "any" type, - for which any Unicode line ending sequence is assumed to end - a line. The Unicode sequences are the three just mentioned, - plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL - (next line, U+0085), LS (line separator, U+2028), and PS + The newline type may be specified in upper, lower, or mixed + case. If the newline type is NUL, lines are separated by bi- + nary zero characters. The other types are the single-charac- + ter sequences CR (carriage return) and LF (linefeed), the + two-character sequence CRLF, an "anycrlf" type, which recog- + nizes any of the preceding three types, and an "any" type, + for which any Unicode line ending sequence is assumed to end + a line. The Unicode sequences are the three just mentioned, + plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL + (next line, U+0085), LS (line separator, U+2028), and PS (paragraph separator, U+2029). - When the PCRE2 library is built, a default line-ending se- - quence is specified. This is normally the standard sequence - for the operating system. Unless otherwise specified by this + When the PCRE2 library is built, a default line-ending se- + quence is specified. This is normally the standard sequence + for the operating system. Unless otherwise specified by this option, pcre2grep uses the library's default. - This option makes it possible to use pcre2grep to scan files + This option makes it possible to use pcre2grep to scan files that have come from other environments without having to mod- - ify their line endings. If the data that is being scanned - does not agree with the convention set by this option, - pcre2grep may behave in strange ways. Note that this option - does not apply to files specified by the -f, --exclude-from, - or --include-from options, which are expected to use the op- + ify their line endings. If the data that is being scanned + does not agree with the convention set by this option, + pcre2grep may behave in strange ways. Note that this option + does not apply to files specified by the -f, --exclude-from, + or --include-from options, which are expected to use the op- erating system's standard newline sequence. -n, --line-number Precede each output line by its line number in the file, fol- - lowed by a colon for matching lines or a hyphen for context + lowed by a colon for matching lines or a hyphen for context lines. If the file name is also being output, it precedes the - line number. When the -M option causes a pattern to match - more than one line, only the first is preceded by its line + line number. When the -M option causes a pattern to match + more than one line, only the first is preceded by its line number. This option is forced if --line-offsets is used. --no-group-separator - Do not output a separator between groups of lines when -A, + Do not output a separator between groups of lines when -A, -B, or -C is in use. The default is to output a line contain- ing two hyphens. See also --group-separator. - --no-jit If the PCRE2 library is built with support for just-in-time + --no-jit If the PCRE2 library is built with support for just-in-time compiling (which speeds up matching), pcre2grep automatically makes use of this, unless it was explicitly disabled at build - time. This option can be used to disable the use of JIT at + time. This option can be used to disable the use of JIT at run time. It is provided for testing and working around prob- lems. It should never be needed in normal use. -O text, --output=text - When there is a match, instead of outputting the line that - matched, output just the text specified in this option, fol- - lowed by an operating-system standard newline. In this mode, - --colour has no effect, and no context is shown. That is, - the -A, -B, and -C options are ignored. The --newline option - has no effect on this option, which is mutually exclusive + When there is a match, instead of outputting the line that + matched, output just the text specified in this option, fol- + lowed by an operating-system standard newline. In this mode, + --colour has no effect, and no context is shown. That is, + the -A, -B, and -C options are ignored. The --newline option + has no effect on this option, which is mutually exclusive with --only-matching, --file-offsets, and --line-offsets. - However, like --only-matching, if there is more than one + However, like --only-matching, if there is more than one match in a line, each of them causes a line of output. Escape sequences starting with a dollar character may be used to insert the contents of the matched part of the line and/or captured substrings into the text. - $ or ${} is replaced by the captured sub- - string of the given decimal number; zero substitutes the - whole match. If the number is greater than the number of cap- - turing substrings, or if the capture is unset, the replace- - ment is empty. + $ or ${} is replaced by the captured sub- + string of the given decimal number; $& (or the legacy $0) + substitutes the whole match. If the number is greater than + the number of capturing substrings, or if the capture is un- + set, the replacement is empty. $a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by newline; $r by carriage return; $t by tab; @@ -787,93 +787,103 @@ OPTIONS mode, the sequence (?aP) restricts [:word:] to ASCII letters, while allowing \w to match Unicode letters and digits. + --posix-pattern-file + When patterns are provided with the -f option, do not trim + trailing spaces or ignore empty lines in a similar way than + other grep tools. To keep the behaviour consistent with older + versions, if the pattern read was terminated with CRLF (as + character literals) then both characters won't be included as + part of it, so if you really need to have pattern ending in + '\r', use a escape sequence or provide it by a different + method. + -q, --quiet Work quietly, that is, display nothing except error messages. - The exit status indicates whether or not any matches were + The exit status indicates whether or not any matches were found. -r, --recursive - If any given path is a directory, recursively scan the files - it contains, taking note of any --include and --exclude set- - tings. By default, a directory is read as a normal file; in - some operating systems this gives an immediate end-of-file. - This option is a shorthand for setting the -d option to "re- + If any given path is a directory, recursively scan the files + it contains, taking note of any --include and --exclude set- + tings. By default, a directory is read as a normal file; in + some operating systems this gives an immediate end-of-file. + This option is a shorthand for setting the -d option to "re- curse". --recursion-limit=number - This is an obsolete synonym for --depth-limit. See --match- + This is an obsolete synonym for --depth-limit. See --match- limit above for details. -s, --no-messages - Suppress error messages about non-existent or unreadable - files. Such files are quietly skipped. However, the return + Suppress error messages about non-existent or unreadable + files. Such files are quietly skipped. However, the return code is still 2, even if matches were found in other files. -t, --total-count - This option is useful when scanning more than one file. If - used on its own, -t suppresses all output except for a grand - total number of matching lines (or non-matching lines if -v + This option is useful when scanning more than one file. If + used on its own, -t suppresses all output except for a grand + total number of matching lines (or non-matching lines if -v is used) in all the files. If -t is used with -c, a grand to- - tal is output except when the previous output is just one - line. In other words, it is not output when just one file's - count is listed. If file names are being output, the grand - total is preceded by "TOTAL:". Otherwise, it appears as just - another number. The -t option is ignored when used with -L - (list files without matches), because the grand total would + tal is output except when the previous output is just one + line. In other words, it is not output when just one file's + count is listed. If file names are being output, the grand + total is preceded by "TOTAL:". Otherwise, it appears as just + another number. The -t option is ignored when used with -L + (list files without matches), because the grand total would always be zero. -u, --utf Operate in UTF/Unicode mode. This option is available only if PCRE2 has been compiled with UTF-8 support. All patterns (in- - cluding those for any --exclude and --include options) and - all lines that are scanned must be valid strings of UTF-8 + cluding those for any --exclude and --include options) and + all lines that are scanned must be valid strings of UTF-8 characters. If an invalid UTF-8 string is encountered, an er- ror occurs. -U, --utf-allow-invalid - As --utf, but in addition subject lines may contain invalid - UTF-8 code unit sequences. These can never form part of any - pattern match. Patterns themselves, however, must still be + As --utf, but in addition subject lines may contain invalid + UTF-8 code unit sequences. These can never form part of any + pattern match. Patterns themselves, however, must still be valid UTF-8 strings. This facility allows valid UTF-8 strings to be sought within arbitrary byte sequences in executable or - other binary files. For more details about matching in non- + other binary files. For more details about matching in non- valid UTF-8 strings, see the pcre2unicode(3) documentation. -V, --version - Write the version numbers of pcre2grep and the PCRE2 library - to the standard output and then exit. Anything else on the + Write the version numbers of pcre2grep and the PCRE2 library + to the standard output and then exit. Anything else on the command line is ignored. -v, --invert-match - Invert the sense of the match, so that lines which do not - match any of the patterns are the ones that are found. When - this option is set, options such as --only-matching and - --output, which specify parts of a match that are to be out- + Invert the sense of the match, so that lines which do not + match any of the patterns are the ones that are found. When + this option is set, options such as --only-matching and + --output, which specify parts of a match that are to be out- put, are ignored. -w, --word-regex, --word-regexp Force the patterns only to match "words". That is, there must - be a word boundary at the start and end of each matched - string. This is equivalent to having "\b(?:" at the start of - each pattern, and ")\b" at the end. This option applies only - to the patterns that are matched against the contents of - files; it does not apply to patterns specified by any of the + be a word boundary at the start and end of each matched + string. This is equivalent to having "\b(?:" at the start of + each pattern, and ")\b" at the end. This option applies only + to the patterns that are matched against the contents of + files; it does not apply to patterns specified by any of the --include or --exclude options. -x, --line-regex, --line-regexp - Force the patterns to start matching only at the beginnings - of lines, and in addition, require them to match entire + Force the patterns to start matching only at the beginnings + of lines, and in addition, require them to match entire lines. In multiline mode the match may be more than one line. This is equivalent to having "^(?:" at the start of each pat- - tern and ")$" at the end. This option applies only to the - patterns that are matched against the contents of files; it - does not apply to patterns specified by any of the --include + tern and ")$" at the end. This option applies only to the + patterns that are matched against the contents of files; it + does not apply to patterns specified by any of the --include or --exclude options. -Z, --null - Terminate files names in the regular output with a zero byte - (the NUL character) instead of what would normally appear. - This is useful when file names contain unusual characters - such as colons, hyphens, or even newlines. The option does + Terminate files names in the regular output with a zero byte + (the NUL character) instead of what would normally appear. + This is useful when file names contain unusual characters + such as colons, hyphens, or even newlines. The option does not apply to file names in error messages. @@ -887,90 +897,90 @@ ENVIRONMENT VARIABLES NEWLINES - The -N (--newline) option allows pcre2grep to scan files with newline - conventions that differ from the default. This option affects only the - way scanned files are processed. It does not affect the interpretation - of files specified by the -f, --file-list, --exclude-from, or --in- + The -N (--newline) option allows pcre2grep to scan files with newline + conventions that differ from the default. This option affects only the + way scanned files are processed. It does not affect the interpretation + of files specified by the -f, --file-list, --exclude-from, or --in- clude-from options. - Any parts of the scanned input files that are written to the standard - output are copied with whatever newline sequences they have in the in- - put. However, if the final line of a file is output, and it does not - end with a newline sequence, a newline sequence is added. If the new- - line setting is CR, LF, CRLF or NUL, that line ending is output; for + Any parts of the scanned input files that are written to the standard + output are copied with whatever newline sequences they have in the in- + put. However, if the final line of a file is output, and it does not + end with a newline sequence, a newline sequence is added. If the new- + line setting is CR, LF, CRLF or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a single NL is used. - The newline setting does not affect the way in which pcre2grep writes - newlines in informational messages to the standard output and error - streams. Under Windows, the standard output is set to be binary, so - that "\r\n" at the ends of output lines that are copied from the input - is not converted to "\r\r\n" by the C I/O library. This means that any - messages written to the standard output must end with "\r\n". For all - other operating systems, and for all messages to the standard error + The newline setting does not affect the way in which pcre2grep writes + newlines in informational messages to the standard output and error + streams. Under Windows, the standard output is set to be binary, so + that "\r\n" at the ends of output lines that are copied from the input + is not converted to "\r\r\n" by the C I/O library. This means that any + messages written to the standard output must end with "\r\n". For all + other operating systems, and for all messages to the standard error stream, "\n" is used. OPTIONS COMPATIBILITY WITH GNU GREP Many of the short and long forms of pcre2grep's options are the same as - in the GNU grep program. Any long option of the form --xxx-regexp (GNU - terminology) is also available as --xxx-regex (PCRE2 terminology). - However, the --case-restrict, --depth-limit, -E, --file-list, --file- + in the GNU grep program. Any long option of the form --xxx-regexp (GNU + terminology) is also available as --xxx-regex (PCRE2 terminology). + However, the --case-restrict, --depth-limit, -E, --file-list, --file- offsets, --heap-limit, --include-dir, --line-offsets, --locale, - --match-limit, -M, --multiline, -N, --newline, --no-ucp, --om-separa- - tor, --output, -P, -u, --utf, -U, and --utf-allow-invalid options are + --match-limit, -M, --multiline, -N, --newline, --no-ucp, --om-separa- + tor, --output, -P, -u, --utf, -U, and --utf-allow-invalid options are specific to pcre2grep, as is the use of the --only-matching option with a capturing parentheses number. - Although most of the common options work the same way, a few are dif- - ferent in pcre2grep. For example, the --include option's argument is a + Although most of the common options work the same way, a few are dif- + ferent in pcre2grep. For example, the --include option's argument is a glob for GNU grep, but in pcre2grep it is a regular expression to which - the -i option applies. If both the -c and -l options are given, GNU - grep lists only file names, without counts, but pcre2grep gives the + the -i option applies. If both the -c and -l options are given, GNU + grep lists only file names, without counts, but pcre2grep gives the counts as well. OPTIONS WITH DATA There are four different ways in which an option with data can be spec- - ified. If a short form option is used, the data may follow immedi- + ified. If a short form option is used, the data may follow immedi- ately, or (with one exception) in the next command line item. For exam- ple: -f/some/file -f /some/file - The exception is the -o option, which may appear with or without data. - Because of this, if data is present, it must follow immediately in the + The exception is the -o option, which may appear with or without data. + Because of this, if data is present, it must follow immediately in the same item, for example -o3. - If a long form option is used, the data may appear in the same command - line item, separated by an equals character, or (with two exceptions) + If a long form option is used, the data may appear in the same command + line item, separated by an equals character, or (with two exceptions) it may appear in the next command line item. For example: --file=/some/file --file /some/file - Note, however, that if you want to supply a file name beginning with ~ - as data in a shell command, and have the shell expand ~ to a home di- - rectory, you must separate the file name from the option, because the + Note, however, that if you want to supply a file name beginning with ~ + as data in a shell command, and have the shell expand ~ to a home di- + rectory, you must separate the file name from the option, because the shell does not treat ~ specially unless it is at the start of an item. - The exceptions to the above are the --colour (or --color) and --only- - matching options, for which the data is optional. If one of these op- - tions does have data, it must be given in the first form, using an + The exceptions to the above are the --colour (or --color) and --only- + matching options, for which the data is optional. If one of these op- + tions does have data, it must be given in the first form, using an equals character. Otherwise pcre2grep will assume that it has no data. USING PCRE2'S CALLOUT FACILITY - pcre2grep has, by default, support for calling external programs or - scripts or echoing specific strings during matching by making use of - PCRE2's callout facility. However, this support can be completely or - partially disabled when pcre2grep is built. You can find out whether - your binary has support for callouts by running it with the --help op- - tion. If callout support is completely disabled, all callouts in pat- - terns are ignored by pcre2grep. If the facility is partially disabled, + pcre2grep has, by default, support for calling external programs or + scripts or echoing specific strings during matching by making use of + PCRE2's callout facility. However, this support can be completely or + partially disabled when pcre2grep is built. You can find out whether + your binary has support for callouts by running it with the --help op- + tion. If callout support is completely disabled, callouts in patterns + are forbidden by pcre2grep. If the facility is partially disabled, calling external programs is not supported, and callouts that request it are ignored. @@ -988,13 +998,13 @@ USING PCRE2'S CALLOUT FACILITY processed as a zero-terminated string, which means it should not con- tain any internal binary zeros. It is written to the output, having first been passed through the same escape processing as text from the - --output (-O) option (see above). However, $0 cannot be used to insert - a matched substring because the match is still in progress. Instead, - the single character '0' is inserted. Any syntax errors in the string - (for example, a dollar not followed by another character) causes the - callout to be ignored. No terminator is added to the output string, so - if you want a newline, you must include it explicitly using the escape - $n. For example: + --output (-O) option (see above). However, $0 or $& cannot be used to + insert a matched substring because the match is still in progress. In- + stead, the single character '0' is inserted. Any syntax errors in the + string (for example, a dollar not followed by another character) causes + the callout to be ignored. No terminator is added to the output string, + so if you want a newline, you must include it explicitly using the es- + cape $n. For example: pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' @@ -1018,10 +1028,10 @@ USING PCRE2'S CALLOUT FACILITY Any substring (including the executable name) may contain escape se- quences started by a dollar character. These are the same as for the - --output (-O) option documented above, except that $0 cannot insert the - matched string because the match is still in progress. Instead, the - character '0' is inserted. If you need a literal dollar or pipe charac- - ter in any substring, use $$ or $| respectively. Here is an example: + --output (-O) option documented above, except that $0 or $& cannot in- + sert the matched string because the match is still in progress. In- + stead, the character substring, use $$ or $| respectively. Here is an + example: echo -e "abcde\n12345" | pcre2grep \ '(?x)(.)(..(.)) @@ -1088,8 +1098,8 @@ AUTHOR REVISION - Last updated: 22 December 2023 + Last updated: 09 October 2024 Copyright (c) 1997-2023 University of Cambridge. -PCRE2 10.43 22 December 2023 PCRE2GREP(1) +PCRE2 10.45-RC1 09 October 2024 PCRE2GREP(1) diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3 index 8798089..d0c8452 100644 --- a/doc/pcre2jit.3 +++ b/doc/pcre2jit.3 @@ -1,4 +1,4 @@ -.TH PCRE2JIT 3 "21 February 2024" "PCRE2 10.43" +.TH PCRE2JIT 3 "22 August 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" @@ -38,7 +38,7 @@ platforms: .sp If --enable-jit is set on an unsupported platform, compilation fails. .P -A client program can tell if JIT support is available by calling +A client program can tell if JIT support has been compiled by calling \fBpcre2_config()\fP with the PCRE2_CONFIG_JIT option. The result is one if PCRE2 was built with JIT support, and zero otherwise. However, having the JIT code available does not guarantee that it will be used for any particular @@ -49,10 +49,17 @@ items that are not supported by JIT .\" (see below). Another reason is that in some environments JIT is unable to get -memory in which to build its compiled code. The only guarantee from +executable memory in which to build its compiled code. The only guarantee from \fBpcre2_config()\fP is that if it returns zero, JIT will definitely \fInot\fP be used. .P +As of release 10.45 there is a more informative way to test for JIT support. If +\fBpcre2_compile_jit()\fP is called with the single option PCRE2_JIT_TEST_ALLOC +it returns zero if JIT is available and has a working allocator. Otherwise it +returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate executable +memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not compiled. The +code argument is ignored, so it can be a NULL value. +.P A simple program does not need to check availability in order to use JIT when possible. The API is implemented in a way that falls back to the interpretive code if JIT is not available or cannot be used for a given match. For programs @@ -103,7 +110,8 @@ option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore PCRE2_JIT_COMPLETE and just compile code for partial matching. If \fBpcre2_jit_compile()\fP is called with no option bits set, it immediately -returns zero. This is an alternative way of testing whether JIT is available. +returns zero. This is an alternative way of testing whether JIT support has +been compiled. .P At present, it is not possible to free JIT compiled code except when the entire compiled pattern is freed by calling \fBpcre2_code_free()\fP. @@ -476,6 +484,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 21 February 2024 +Last updated: 22 August 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2limits.3 b/doc/pcre2limits.3 index a1ea3ea..5b794ec 100644 --- a/doc/pcre2limits.3 +++ b/doc/pcre2limits.3 @@ -1,4 +1,4 @@ -.TH PCRE2LIMITS 3 "1 August 2023" "PCRE2 10.43" +.TH PCRE2LIMITS 3 "16 August 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SIZE AND OTHER LIMITATIONS" @@ -76,6 +76,6 @@ Cambridge, England. .rs .sp .nf -Last updated: August 2023 +Last updated: 16 August 2023 Copyright (c) 1997-2023 University of Cambridge. .fi diff --git a/doc/pcre2matching.3 b/doc/pcre2matching.3 index 96800ef..f556c04 100644 --- a/doc/pcre2matching.3 +++ b/doc/pcre2matching.3 @@ -1,4 +1,4 @@ -.TH PCRE2MATCHING 3 "19 January 2024" "PCRE2 10.43" +.TH PCRE2MATCHING 3 "30 August 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 MATCHING ALGORITHMS" @@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) This document describes the two different algorithms that are available in PCRE2 for matching a compiled regular expression against a given subject string. The "standard" algorithm is the one provided by the \fBpcre2_match()\fP -function. This works in the same as Perl's matching function, and provide a +function. This works in the same as Perl's matching function, and provides a Perl-compatible matching operation. The just-in-time (JIT) optimization that is described in the .\" HREF @@ -22,7 +22,7 @@ these are described below. .P When there is only one possible way in which a given subject string can match a pattern, the two algorithms give the same answer. A difference arises, however, -when there are multiple possibilities. For example, if the pattern +when there are multiple possibilities. For example, if the anchored pattern .sp ^<.*> .sp @@ -96,9 +96,9 @@ the output vector in decreasing order of length. There is an option to stop the algorithm after the first match (which is necessarily the shortest) is found. .P Note that the size of vector needed to contain all the results depends on the -number of simultaneous matches, not on the number of parentheses in the -pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match -data block is therefore not advisable when doing DFA matching. +number of simultaneous matches, not on the number of capturing parentheses in +the pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the +match data block is therefore not advisable when doing DFA matching. .P Note also that all the matches that are found start at the same point in the subject. If the pattern @@ -141,30 +141,34 @@ straightforward to keep track of captured substrings for the different matching possibilities, and PCRE2's implementation of this algorithm does not attempt to do this. This means that no captured substrings are available. .P -3. Because no substrings are captured, backreferences within the pattern are -not supported. -.P -4. For the same reason, conditional expressions that use a backreference as the -condition or test for a specific group recursion are not supported. -.P -5. Again for the same reason, script runs are not supported. +3. Because no substrings are captured, a number of related features are not +available: +.sp +(a) Backreferences; +.sp +(b) Conditional expressions that use a backreference as the condition or test +for a specific group recursion; +.sp +(c) Script runs; +.sp +(d) Scan substring assertions. .P -6. Because many paths through the tree may be active, the \eK escape sequence, +4. Because many paths through the tree may be active, the \eK escape sequence, which resets the start of the match when encountered (but may be on some paths and not on others), is not supported. .P -7. Callouts are supported, but the value of the \fIcapture_top\fP field is +5. Callouts are supported, but the value of the \fIcapture_top\fP field is always 1, and the value of the \fIcapture_last\fP field is always 0. .P -8. The \eC escape sequence, which (in the standard algorithm) always matches a -single code unit, even in a UTF mode, is not supported in these modes, because +6. The \eC escape sequence, which (in the standard algorithm) always matches a +single code unit, even in a UTF mode, is not supported in UTF modes because the alternative algorithm moves through the subject string one character (not code unit) at a time, for all active paths through the tree. .P -9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not supported. (*FAIL) is supported, and behaves like a failing negative assertion. .P -10. The PCRE2_MATCH_INVALID_UTF option for \fBpcre2_compile()\fP is not +8. The PCRE2_MATCH_INVALID_UTF option for \fBpcre2_compile()\fP is not supported by \fBpcre2_dfa_match()\fP. . . @@ -194,13 +198,15 @@ The alternative algorithm suffers from a number of disadvantages: because it has to search for all possible matches, but is also because it is less susceptible to optimization. .P -2. Capturing parentheses, backreferences, script runs, and matching within -invalid UTF string are not supported. +2. Capturing parentheses and other features such as backreferences that rely on +them are not supported. +.P +3. Matching within invalid UTF strings is not supported. .P -3. Although atomic groups are supported, their use does not provide the +4. Although atomic groups are supported, their use does not provide the performance advantage that it does for the standard algorithm. .P -4. JIT optimization is not supported. +5. JIT optimization is not supported. . . .SH AUTHOR @@ -217,6 +223,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 19 January 2024 +Last updated: 30 August 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2partial.3 b/doc/pcre2partial.3 index cf8006d..8047fca 100644 --- a/doc/pcre2partial.3 +++ b/doc/pcre2partial.3 @@ -1,6 +1,6 @@ -.TH PCRE2PARTIAL 3 "04 September 2019" "PCRE2 10.34" +.TH PCRE2PARTIAL 3 "27 November 2024" "PCRE2 10.45-RC1" .SH NAME -PCRE2 - Perl-compatible regular expressions +PCRE2 - Perl-compatible regular expressions (revised API) .SH "PARTIAL MATCHING IN PCRE2" .rs .sp @@ -368,6 +368,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 04 September 2019 +Last updated: 27 November 2024 Copyright (c) 1997-2019 University of Cambridge. .fi diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 index c3ccb0b..c9cbf6a 100644 --- a/doc/pcre2pattern.3 +++ b/doc/pcre2pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2PATTERN 3 "04 June 2024" "PCRE2 10.44" +.TH PCRE2PATTERN 3 "27 November 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION DETAILS" @@ -10,9 +10,11 @@ are described in detail below. There is a quick-reference syntax summary in the \fBpcre2syntax\fP .\" page. PCRE2 tries to match Perl syntax and semantics as closely as it can. -PCRE2 also supports some alternative regular expression syntax (which does not -conflict with the Perl syntax) in order to provide some compatibility with -regular expressions in Python, .NET, and Oniguruma. +PCRE2 also supports some alternative regular expression syntax that does not +conflict with the Perl syntax in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. There are in addition some +options that enable alternative syntax and semantics that are not the same as +in Perl. .P Perl's regular expressions are described in its own documentation, and regular expressions in general are covered in a number of books, some of which have @@ -33,6 +35,23 @@ function, are discussed in the page. . . +.SH "EBCDIC CHARACTER CODES" +.rs +.sp +Most computers use ASCII or Unicode for encoding characters, and PCRE2 assumes +this by default. However, it can be compiled to run in an environment that uses +the EBCDIC code, which is the case for some IBM mainframe operating systems. In +the sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. Differences in behaviour when PCRE2 is running in +an EBCDIC environment are described in the section +.\" HTML +.\" +"EBCDIC environments" +.\" +below, which you can ignore unless you really are in an EBCDIC environment. +. +. .SH "SPECIAL START-OF-PATTERN ITEMS" .rs .sp @@ -104,7 +123,8 @@ of the subject. .rs .sp If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting -the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making quantifiers +the PCRE2_NO_AUTO_POSSESS option, or calling \fBpcre2_set_optimize()\fP with +a PCRE2_AUTO_POSSESS_OFF directive. This stops PCRE2 from making quantifiers possessive when what follows cannot match the repeated item. For example, by default a+b is treated as a++b. For more details, see the .\" HREF @@ -117,8 +137,9 @@ documentation. .rs .sp If a pattern starts with (*NO_START_OPT), it has the same effect as setting the -PCRE2_NO_START_OPTIMIZE option. This disables several optimizations for quickly -reaching "no match" results. For more details, see the +PCRE2_NO_START_OPTIMIZE option, or calling \fBpcre2_set_optimize()\fP with +a PCRE2_START_OPTIMIZE_OFF directive. This disables several optimizations for +quickly reaching "no match" results. For more details, see the .\" HREF \fBpcre2api\fP .\" @@ -129,7 +150,8 @@ documentation. .rs .sp If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect as -setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimizations that +setting the PCRE2_NO_DOTSTAR_ANCHOR option, or calling \fBpcre2_set_optimize()\fP +with a PCRE2_DOTSTAR_ANCHOR_OFF directive. This disables optimizations that apply to patterns whose top-level branches all start with .* (match any number of arbitrary characters). For more details, see the .\" HREF @@ -249,16 +271,6 @@ at compile time. This effect can also be achieved by starting a pattern with corresponding to PCRE2_BSR_UNICODE. . . -.SH "EBCDIC CHARACTER CODES" -.rs -.sp -PCRE2 can be compiled to run in an environment that uses EBCDIC as its -character code instead of ASCII or Unicode (typically a mainframe system). In -the sections below, character code values are ASCII or Unicode; in an EBCDIC -environment these characters may have different code values, and there are no -code points greater than 255. -. -. .SH "CHARACTERS AND METACHARACTERS" .rs .sp @@ -275,7 +287,10 @@ ASCII characters, K and S, that, in addition to their lower case ASCII equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the PCRE2_EXTRA_CASELESS_RESTRICT option is in force (either passed to -\fBpcre2_compile()\fP or set by (?r) within the pattern). +\fBpcre2_compile()\fP or set by (*CASELESS_RESTRICT) or (?r) within the +pattern). If the PCRE2_EXTRA_TURKISH_CASING option is in force (either passed +to \fBpcre2_compile()\fP or set by (*TURKISH_CASING) within the pattern), then +the 'i' letters are matched according to Turkish and Azeri languages. .P The power of regular expressions comes from the ability to include wild cards, character classes, alternatives, and repetitions in the pattern. These are @@ -320,7 +335,7 @@ a character class the only metacharacters are: .sp If a pattern is compiled with the PCRE2_EXTENDED option, most white space in the pattern, other than in a character class, within a \eQ...\eE sequence, or -between a # outside a character class and the next newline, inclusive, are +between a # outside a character class and the next newline, inclusive, is ignored. An escaping backslash can be used to include a white space or a # character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same applies, but in addition unescaped space and horizontal tab characters are @@ -381,6 +396,13 @@ by \eE later in the pattern, the literal interpretation continues to the end of the pattern (that is, \eE is assumed at the end). If the isolated \eQ is inside a character class, this causes an error, because the character class is then not terminated by a closing square bracket. +.P +Another difference from Perl is that any appearance of \eQ or \eE inside what +might otherwise be a quantifier causes PCRE2 not to recognize the sequence as a +quantifier. Perl recognizes a quantifier if (redundantly) either of the numbers +is inside \eQ...\eE, but not if the separating comma is. When not recognized as +a quantifier a sequence such as {\eQ1\eE,2} is treated as the literal string +"{1,2}". . . .\" HTML @@ -402,17 +424,32 @@ environment, these escapes are as follows: \er carriage return (hex 0D) (but see below) \et tab (hex 09) \e0dd character with octal code 0dd - \eddd character with octal code ddd, or backreference + \eddd character with octal code ddd, or back reference \eo{ddd..} character with octal code ddd.. \exhh character with hex code hh \ex{hhh..} character with hex code hhh.. \eN{U+hhh..} character with Unicode hex code point hhh.. .sp -By default, after \ex that is not followed by {, from zero to two hexadecimal -digits are read (letters can be in upper or lower case). Any number of -hexadecimal digits may appear between \ex{ and }. If a character other than a -hexadecimal digit appears between \ex{ and }, or if there is no terminating }, -an error occurs. +A description of how back references work is given +.\" HTML +.\" +later, +.\" +following the discussion of +.\" HTML +.\" +parenthesized groups. +.\" +.P +By default, after \ex that is not followed by {, one or two hexadecimal +digits are read (letters can be in upper or lower case). If the character that +follows \ex is neither { nor a hexadecimal digit, an error occurs. This is +different from Perl's default behaviour, which generates a NUL character, but +is in line with the behaviour of Perl's 'strict' mode in re. +.P +Any number of hexadecimal digits may appear between \ex{ and }. If a character +other than a hexadecimal digit appears between \ex{ and }, or if there is no +terminating }, an error occurs. .P Characters whose code points are less than 256 can be defined by either of the two syntaxes for \ex or by an octal sequence. There is no difference in the way @@ -452,67 +489,53 @@ lower case letter, it is converted to upper case. Then bit 6 of the character the code unit following \ec has a code point less than 32 or greater than 126, a compile-time error occurs. .P -When PCRE2 is compiled in EBCDIC mode, \eN{U+hhh..} is not supported. \ea, \ee, -\ef, \en, \er, and \et generate the appropriate EBCDIC code values. The \ec -escape is processed as specified for Perl in the \fBperlebcdic\fP document. The -only characters that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], -^, _, or ?. Any other character provokes a compile-time error. The sequence -\ec@ encodes character code 0; after \ec the letters (in either case) encode -characters 1-26 (hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31 -(hex 1B to hex 1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F). -.P -Thus, apart from \ec?, these escapes generate the same character code values as -they do in an ASCII environment, though the meanings of the values mostly -differ. For example, \ecG always generates code value 7, which is BEL in ASCII -but DEL in EBCDIC. -.P -The sequence \ec? generates DEL (127, hex 7F) in an ASCII environment, but -because 127 is not a control character in EBCDIC, Perl makes it generate the -APC character. Unfortunately, there are several variants of EBCDIC. In most of -them the APC character has the value 255 (hex FF), but in the one Perl calls -POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC -values, PCRE2 makes \ec? generate 95; otherwise it generates 255. -.P -After \e0 up to two further octal digits are read. If there are fewer than two -digits, just those that are present are used. Thus the sequence \e0\ex\e015 -specifies two binary zeros followed by a CR character (code value 13). Make -sure you supply two digits after the initial zero if the pattern character that -follows is itself an octal digit. -.P +For differences in the way some escapes behave in EBCDIC environments, +see section +.\" HTML +.\" +"EBCDIC environments" +.\" +below. +. +. +.SS "Octal escapes and back references" +.rs +.sp The escape \eo must be followed by a sequence of octal digits, enclosed in -braces. An error occurs if this is not the case. This escape is a recent -addition to Perl; it provides way of specifying character code points as octal -numbers greater than 0777, and it also allows octal numbers and backreferences -to be unambiguously specified. +braces. An error occurs if this is not the case. This escape provides a way of +specifying character code points as octal numbers greater than 0777, and it +also allows octal numbers and backreferences to be unambiguously distinguished. +.P +If braces are not used, after \e0 up to two further octal digits are read. +However, if the PCRE2_EXTRA_NO_BS0 option is set, at least one more octal digit +must follow \e0 (use \e00 to generate a NUL character). Make sure you supply +two digits after the initial zero if the pattern character that follows is +itself an octal digit. +.P +Inside a character class, when a backslash is followed by any octal digit, up +to three octal digits are read to generate a code point. Any subsequent digits +stand for themselves. The sequences \e8 and \e9 are treated as the literal +characters "8" and "9". +.P +Outside a character class, Perl's handling of a backslash followed by a digit +other than 0 is complicated by ambiguity, and Perl has changed over time, +causing PCRE2 also to change. From PCRE2 release 10.45 there is an option +called PCRE2_EXTRA_PYTHON_OCTAL that causes PCRE2 to use Python's unambiguous +rules. The next two subsections describe the two sets of rules. .P For greater clarity and unambiguity, it is best to avoid following \e by a digit greater than zero. Instead, use \eo{...} or \ex{...} to specify numerical -character code points, and \eg{...} to specify backreferences. The following -paragraphs describe the old, ambiguous syntax. -.P -The handling of a backslash followed by a digit other than 0 is complicated, -and Perl has changed over time, causing PCRE2 also to change. -.P -Outside a character class, PCRE2 reads the digit and any following digits as a -decimal number. If the number is less than 10, begins with the digit 8 or 9, or -if there are at least that many previous capture groups in the expression, the -entire sequence is taken as a \fIbackreference\fP. A description of how this -works is given -.\" HTML -.\" -later, -.\" -following the discussion of -.\" HTML -.\" -parenthesized groups. -.\" -Otherwise, up to three octal digits are read to form a character code. -.P -Inside a character class, PCRE2 handles \e8 and \e9 as the literal characters -"8" and "9", and otherwise reads up to three octal digits following the -backslash, using them to generate a data character. Any subsequent digits stand -for themselves. For example, outside a character class: +character code points, and \eg{...} to specify backreferences. +. +. +.SS "Perl rules for non-class backslash 1-9" +.rs +.sp +All the digits that follow the backslash are read as a decimal number. If the +number is less than 10, begins with the digit 8 or 9, or if there are at least +that many previous capture groups in the expression, the entire sequence is +taken as a back reference. Otherwise, up to three octal digits are read to form +a character code. For example: .sp \e040 is another way of writing an ASCII space .\" JOIN @@ -537,6 +560,19 @@ must not be introduced by a leading zero, because no more than three octal digits are ever read. . . +.SS "Python rules for non_class backslash 1-9" +.rs +.sp +If there are at least three octal digits after the backslash, exactly three are +read as an octal code point number, but the value must be no greater than +\e377, even in modes where higher code point values are supported. Any +subsequent digits stand for themselves. If there are fewer than three octal +digits, the sequence is taken as a decimal back reference. Thus, for example, +\e12 is always a back reference, independent of how many captures there are in +the pattern. An error is generated for a reference to a non-existent capturing +group. +. +. .SS "Constraints on character values" .rs .sp @@ -804,7 +840,7 @@ When PCRE2 is built with Unicode support (the default), three additional escape sequences that match characters with specific properties are available. They can be used in any mode, though in 8-bit and 16-bit non-UTF modes these sequences are of course limited to testing characters whose code points are -less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points +less than U+0100 or U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all treated as being in the Unknown script and with an unassigned type. .P @@ -820,12 +856,31 @@ The extra escape sequences that provide property support are: \eP{\fIxx\fP} a character without the \fIxx\fP property \eX a Unicode extended grapheme cluster .sp -The property names represented by \fIxx\fP above are not case-sensitive, and in -accordance with Unicode's "loose matching" rules, spaces, hyphens, and -underscores are ignored. There is support for Unicode script names, Unicode -general category properties, "Any", which matches any character (including -newline), Bidi_Class, a number of binary (yes/no) properties, and some special -PCRE2 properties (described +For compatibility with Perl, negation can be specified by including a +circumflex between the opening brace and the property. For example, \ep{^Lu} is +the same as \eP{Lu}. +.P +In accordance with Unicode's "loose matching" rules, ASCII white space +characters, hyphens, and underscores are ignored in the properties represented +by \fIxx\fP above. As well as the space character, ASCII white space can be +tab, linefeed, vertical tab, formfeed, or carriage return. +.P +Some properties are specified as a name only; others as a name and a value, +separated by a colon or an equals sign. The names and values consist of ASCII +letters and digits (with one Perl-specific exception, see below). They are not +case sensitive. Note, however, that the escapes themselves, \ep and \eP, +\fIare\fP case sensitive. There are abbreviations for many names. The following +examples are all equivalent: +.sp + \ep{bidiclass=al} + \ep{BC=al} + \ep{ Bidi_Class : AL } + \ep{ Bi-di class = Al } + \eP{ ^ Bi-di class = Al } +.sp +There is support for Unicode script names, Unicode general category properties, +"Any", which matches any character (including newline), Bidi_Class, a number of +binary (yes/no) properties, and some special PCRE2 properties (described .\" HTML .\" below). @@ -845,10 +900,11 @@ Extensions") with which it is commonly used. Using the Adlam script as an example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas \ep{scx:Adlam} matches, in addition, characters that have Adlam in their extensions list. The full names "script" and "script extensions" for the -property types are recognized, and a equals sign is an alternative to the -colon. If a script name is given without a property type, for example, -\ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl changed to this -interpretation at release 5.26 and PCRE2 changed at release 10.40. +property types are recognized and, as for all property specifications, an +equals sign is an alternative to the colon. If a script name is given without a +property type, for example, \ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl +changed to this interpretation at release 5.26 and PCRE2 changed at release +10.40. .P Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not @@ -865,14 +921,10 @@ by running this command: .rs .sp Each character has exactly one Unicode general category property, specified by -a two-letter abbreviation. For compatibility with Perl, negation can be -specified by including a circumflex between the opening brace and the property -name. For example, \ep{^Lu} is the same as \eP{Lu}. -.P -If only one letter is specified with \ep or \eP, it includes all the general -category properties that start with that letter. In this case, in the absence -of negation, the curly brackets in the escape sequence are optional; these two -examples have the same effect: +a two-letter abbreviation. If only one letter is specified with \ep or \eP, it +includes all the general category properties that start with that letter. In +this case, in the absence of negation, the curly brackets in the escape +sequence are optional; these two examples have the same effect: .sp \ep{L} \epL @@ -887,6 +939,7 @@ The following general category property codes are supported: Cs Surrogate .sp L Letter + Lc Cased letter Ll Lower case letter Lm Modifier letter Lo Other letter @@ -923,9 +976,13 @@ The following general category property codes are supported: Zp Paragraph separator Zs Space separator .sp -The special property LC, which has the synonym L&, is also supported: it -matches a character that has the Lu, Ll, or Lt property, in other words, a -letter that is not classified as a modifier or "other". +Perl originally used the name L& for the Lc property. This is still supported +by Perl, but discouraged. PCRE2 also still supports it. This property matches +any character that has the Lu, Ll, or Lt property, in other words, any letter +that is not classified as a modifier or "other". From release 10.45 of PCRE2 +the properties Lu, Ll, and Lt are all treated as Lc when case-independent +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. The +other properties are not affected by caseless matching. .P The Cs (Surrogate) property applies only to characters whose code points are in the range U+D800 to U+DFFF. These characters are no different to any other @@ -945,10 +1002,6 @@ properties with "Is". No character that is in the Unicode table has the Cn (unassigned) property. Instead, this property is assumed for any code point that is not in the Unicode table. -.P -Specifying caseless matching does not affect these escape sequences. For -example, \ep{Lu} always matches only upper case letters. This is different from -the behaviour of current versions of Perl. . . .SS "Binary (yes/no) properties for \ep and \eP" @@ -992,10 +1045,11 @@ The recognized classes are: RLI right-to-left isolate RLO right-to-left override S segment separator - WS which space + WS white space .sp -An equals sign may be used instead of a colon. The class names are -case-insensitive; only the short names listed above are recognized. +As in all property specifications, an equals sign may be used instead of a +colon and the class names are case-insensitive. Only the short names listed +above are recognized; PCRE2 does not at present support any long alternatives. . . .SS Extended grapheme clusters @@ -1063,11 +1117,11 @@ explicitly. These properties are: .sp Xan matches characters that have either the L (letter) or the N (number) property. Xps matches the characters tab, linefeed, vertical tab, form feed, or -carriage return, and any other character that has the Z (separator) property. -Xsp is the same as Xps; in PCRE1 it used to exclude vertical tab, for Perl -compatibility, but Perl changed. Xwd matches the same characters as Xan, plus -those that match Mn (non-spacing mark) or Pc (connector punctuation, which -includes underscore). +carriage return, and any other character that has the Z (separator) property +(this includes the space character). Xsp is the same as Xps; in PCRE1 it used +to exclude vertical tab, for Perl compatibility, but Perl changed. Xwd matches +the same characters as Xan, plus those that match Mn (non-spacing mark) or Pc +(connector punctuation, which includes underscore). .P There is another non-standard property, Xuc, which matches any character that can be represented by a Universal Character Name in C++ and other programming @@ -1391,13 +1445,12 @@ subject character must not be in the set defined by the class. If a circumflex is actually required as a member of the class, ensure it is not the first character, or escape it with a backslash. .P -For example, the character class [aeiou] matches any lower case vowel, while -[^aeiou] matches any character that is not a lower case vowel. Note that a -circumflex is just a convenient notation for specifying the characters that -are in the class by enumerating those that are not. A class that starts with a -circumflex is not an assertion; it still consumes a character from the subject -string, and therefore it fails if the current pointer is at the end of the -string. +For example, the character class [aeiou] matches any lower case English vowel, +whereas [^aeiou] matches all other characters. Note that a circumflex is just a +convenient notation for specifying the characters that are in the class by +enumerating those that are not. A class that starts with a circumflex is not an +assertion; it still consumes a character from the subject string, and therefore +it fails to match if the current pointer is at the end of the string. .P Characters in a class may be specified by their code points using \eo, \ex, or \eN{U+hh..} in the usual way. When caseless matching is set, any letters in a @@ -1406,7 +1459,10 @@ a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a caseful version would. Note that there are two ASCII characters, K and S, that, in addition to their lower case ASCII equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) -respectively when either PCRE2_UTF or PCRE2_UCP is set. +respectively when either PCRE2_UTF or PCRE2_UCP is set. If you do not want +these ASCII/non-ASCII case equivalences, you can suppress them by setting +PCRE2_EXTRA_CASELESS_RESTRICT, either as an option in a compile context, or by +including (*CASELESS_RESTRICT) or (?r) within a pattern. .P Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending sequence is in use, and @@ -1437,6 +1493,14 @@ indicating a range, typically as the first or last character in the class, or immediately after a range. For example, [b-d-z] matches letters in the range b to d, a hyphen character, or z. .P +There is some special treatment for alphabetic ranges in EBCDIC environments; +see the section +.\" HTML +.\" +"EBCDIC environments" +.\" +below. +.P Perl treats a hyphen as a literal if it appears before or after a POSIX class (see below) or before or after a character type escape such as \ed or \eH. However, unless the hyphen is the last character in the class, Perl outputs a @@ -1447,9 +1511,9 @@ It is not possible to have the literal character "]" as the end character of a range. A pattern such as [W-]46] is interpreted as a class of two characters ("W" and "-") followed by a literal string "46]", so it would match "W46]" or "-46]". However, if the "]" is escaped with a backslash it is interpreted as -the end of range, so [W-\e]46] is interpreted as a class containing a range -followed by two other characters. The octal or hexadecimal representation of -"]" can also be used to end a range. +the end of a range, so [W-\e]46] is interpreted as a class containing a range +and two other characters. The octal or hexadecimal representation of "]" can +also be used to end a range. .P Ranges normally include all code points between the start and end characters, inclusive. They can also be used for code points specified numerically, for @@ -1460,14 +1524,6 @@ explicitly by default (the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). However, ranges such as [\ex{d7ff}-\ex{e000}], which include the surrogates, are always permitted. .P -There is a special case in EBCDIC environments for ranges whose end points are -both specified as literal letters in the same case. For compatibility with -Perl, EBCDIC code points within the range that are not letters are omitted. For -example, [h-k] matches only four characters, even though the codes for h and k -are 0x88 and 0x92, a range of 11 code points. However, if the range is -specified numerically, for example, [\ex88-\ex92] or [h-\ex92], all code points -are included. -.P If a range that includes letters is used when caseless matching is set, it matches the letters in either case. For example, [W-c] is equivalent to [][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character @@ -1481,20 +1537,132 @@ whereas [\ew] includes underscore. A positive character class should be read as "something OR something OR ..." and a negative class as "NOT something AND NOT something AND NOT ...". .P -The only metacharacters that are recognized in character classes are backslash, -hyphen (only where it can be interpreted as specifying a range), circumflex -(only at the start), opening square bracket (only when it can be interpreted as -introducing a POSIX class name, or for a special compatibility feature - see -the next two sections), and the terminating closing square bracket. However, -escaping other non-alphanumeric characters does no harm. +The metacharacters that are recognized in character classes are backslash, +hyphen (when it can be interpreted as specifying a range), circumflex +(only at the start), and the terminating closing square bracket. An opening +square bracket is also special when it can be interpreted as introducing a +POSIX class (see +.\" HTML +.\" +"Posix character classes" +.\" +below), or a special compatibility feature (see +.\" HTML +.\" +"Compatibility feature for word boundaries" +.\" +below. Escaping any non-alphanumeric character in a class turns it into a +literal, whether or not it would otherwise be a metacharacter. . . +.SH "PERL EXTENDED CHARACTER CLASSES" +.rs +.sp +From release 10.45 PCRE2 supports Perl's (?[...]) extended character class +syntax. This can be used to perform set operations such as intersection on +character classes. +.P +The syntax permitted within (?[...]) is quite different to ordinary character +classes. Inside the extended class, there is an expression syntax consisting of +"atoms", operators, and ordinary parentheses "()" used for grouping. Such +classes always have the Perl /xx modifier (PCRE2 option PCRE2_EXTENDED_MORE) +turned on within them. This means that literal space and tab characters are +ignored everywhere in the class. +.P +The allowed atoms are individual characters specified by escape sequences such +as \en or \ex{123}, character types such as \ed, POSIX classes such as +[:alpha:], and nested ordinary (non-extended) character classes. For example, +in (?[\ed & [...]]) the nested class [...] follows the usual rules for ordinary +character classes, in which parentheses are not metacharacters, and character +literals and ranges are permitted. +.P +Character literals and ranges may not appear outside a nested ordinary +character class because they are not atoms in the extended syntax. The extended +syntax does not introduce any additional escape sequences, so (?[\ey]) is an +unknown escape, as it would be in [\ey]. +.P +In the extended syntax, ^ does not negate a class (except within an +ordinary class nested inside an extended class); it is instead a binary +operator. +.P +The binary operators are "&" (intersection), "|" or "+" (union), "-" +(subtraction) and "^" (symmetric difference). These are left-associative and +"&" has higher (tighter) precedence, while the others have equal lower +precedence. The one prefix unary operator is "!" (complement), with highest +precedence. +. +. +.SH "UTS#18 EXTENDED CHARACTER CLASSES" +.rs +.sp +The PCRE2_ALT_EXTENDED_CLASS option enables an alternative to Perl's (?[...]) +syntax, allowing instead extended class behaviour inside ordinary [...] +character classes. This altered syntax for [...] classes is loosely described +by the Unicode standard UTS#18. The PCRE2_ALT_EXTENDED_CLASS option does not +prevent use of (?[...]) classes; it just changes the meaning of all +[...] classes that are not nested inside a Perl (?[...]) class. +.P +Firstly, in ordinary Perl [...] syntax, an expression such as "[a[]" is a +character class with two literal characters "a" and "[", but in UTS#18 extended +classes the "[" character becomes an additional metacharacter within classes, +denoting the start of a nested class, so a literal "[" must be escaped as "\e[". +.P +Secondly, within the UTS#18 extended syntax, there are operators "||", "&&", +"--" and "~~" which denote character class union, intersection, subtraction, +and symmetric difference respectively. In standard Perl syntax, these would +simply be needlessly-repeated literals (except for "--" which could be the +start or end of a range). In UTS#18 extended classes these operators can be used +in constructs such as [\ep{L}--[QW]] for "Unicode letters, other than Q and W". +A literal "-" at the start or end of a range must be escaped, so while "[--1]" +in Perl syntax is the range from hyphen to "1", it must be escaped as "[\e--1]" +in UTS#18 extended classes. +.P +Unlike Perl's (?[...]) extended classes, the PCRE2_EXTENDED_MORE option to +ignore space and tab characters is not automatically enabled for UTS#18 +extended classes, but it is honoured if set. +.P +Extended UTS#18 classes can be nested, and nested classes are themselves +extended classes (unlike Perl, where nested classes must be simple classes). +For example, [\ep{L}&&[\ep{Thai}||\ep{Greek}]] matches any letter that is in +the Thai or Greek scripts. Note that this means that no special grouping +characters (such as the parentheses used in Perl's (?[...]) class syntax) are +needed. +.P +Individual class items (literal characters, literal ranges, properties such as +\ed or \ep{...}, and nested classes) can be combined by juxtaposition or by an +operator. Juxtaposition is the implicit union operator, and binds more tightly +than any explicit operator. Thus a sequence of literals and/or ranges behaves +as if it is enclosed in square brackets. For example, [A-Z0-9&&[^E8]] is the +same as [[A-Z0-9]&&[^E8]], which matches any upper case alphanumeric character +except "E" or "8". +.P +Precedence between the explicit operators is not defined, so mixing operators +is a syntax error. For example, [A&&B--C] is an error, but [A&&[B--C]] is +valid. +.P +This is an emerging syntax which is being adopted gradually across the regex +ecosystem: for example JavaScript adopted the "/v" flag in ECMAScript 2024; +Python's "re" module reserves the syntax for future use with a FutureWarning +for unescaped use of "[" as a literal within character classes. Due to UTS#18 +providing insufficient guidance, engines interpret the syntax differently. +Rust's "regex" crate and Python's "regex" PyPi module both implement UTS#18 +extended classes, but with slight incompatibilities ([A||B&&C] is parsed as +[A||[B&&C]] in Python's "regex" but as [[A||B]&&C] in Rust's "regex"). +.P +PCRE2's syntax adds syntax restrictions similar to ECMASCript's /v flag, so +that all the UTS#18 extended classes accepted as valid by PCRE2 have the +property that they are interpreted either with the same behaviour, or as +invalid, by all other major engines. Please file an issue if you are aware of +cross-engine differences in behaviour between PCRE2 and another major engine. +. +. +.\" HTML .SH "POSIX CHARACTER CLASSES" .rs .sp Perl supports the POSIX notation for character classes. This uses names enclosed by [: and :] within the enclosing square brackets. PCRE2 also supports -this notation. For example, +this notation, in both ordinary and extended classes. For example, .sp [01[:alpha:]%] .sp @@ -1573,7 +1741,7 @@ property. [:xdigit:] In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" versions of those characters, whose Unicode code points start at U+FF10. This -is a change that was made in PCRE release 10.43 for Perl compatibility. +is a change that was made in PCRE2 release 10.43 for Perl compatibility. .P The other POSIX classes are unchanged by PCRE2_UCP, and match only characters with code points less than 256. @@ -1586,6 +1754,7 @@ for all POSIX classes, including [:digit:] and [:xdigit:]. Within a pattern, (?aP) and (?-aP) set and unset both these options for consistency. . . +.\" HTML .SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES" .rs .sp @@ -2124,8 +2293,9 @@ one succeeds. Consider this pattern: (?>.*?a)b .sp It matches "ab" in the subject "aab". The use of the backtracking control verbs -(*PRUNE) and (*SKIP) also disable this optimization, and there is an option, -PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. +(*PRUNE) and (*SKIP) also disable this optimization. To do so explicitly, +either pass the compile option PCRE2_NO_DOTSTAR_ANCHOR, or call +\fBpcre2_set_optimize()\fP with a PCRE2_DOTSTAR_ANCHOR_OFF directive. .P When a capture group is repeated, the value captured is the substring that matched the final iteration. For example, after @@ -2217,8 +2387,9 @@ package, and PCRE1 copied it from there. It found its way into Perl at release PCRE2 has an optimization that automatically "possessifies" certain simple pattern constructs. For example, the sequence A+B is treated as A++B because there is no point in backtracking into a sequence of A's when B must follow. -This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting -the pattern with (*NO_AUTO_POSSESS). +This feature can be disabled by the PCRE2_NO_AUTO_POSSESS option, by calling +\fBpcre2_set_optimize()\fP with a PCRE2_AUTO_POSSESS_OFF directive, or by +starting the pattern with (*NO_AUTO_POSSESS). .P When a pattern contains an unlimited repeat inside a group that can itself be repeated an unlimited number of times, the use of an atomic group is the only @@ -2394,22 +2565,32 @@ as normal. .SH ASSERTIONS .rs .sp -An assertion is a test on the characters following or preceding the current -matching point that does not consume any characters. The simple assertions -coded as \eb, \eB, \eA, \eG, \eZ, \ez, ^ and $ are described +An assertion is a test that does not consume any characters. The test must +succeed for the match to continue. The simple assertions coded as \eb, \eB, +\eA, \eG, \eZ, \ez, ^ and $ are described .\" HTML .\" above. .\" .P -More complicated assertions are coded as parenthesized groups. There are two -kinds: those that look ahead of the current position in the subject string, and -those that look behind it, and in each case an assertion may be positive (must -match for the assertion to be true) or negative (must not match for the -assertion to be true). An assertion group is matched in the normal way, -and if it is true, matching continues after it, but with the matching position +More complicated assertions are coded as parenthesized groups. If matching such +a group succeeds, matching continues after it, but with the matching position in the subject string reset to what it was before the assertion was processed. .P +A special kind of assertion, called a "scan substring" assertion, matches a +subpattern against a previously captured substring. This is described in the +section entitled +.\" HTML +.\" +"Scan substring assertions" +.\" +below. It is a PCRE2 extension, not compatible with Perl. +.P +The other goup-based assertions are of two kinds: those that look ahead of the +current position in the subject string, and those that look behind it, and in +each case an assertion may be positive (must match for the assertion to be +true) or negative (must not match for the assertion to be true). +.P The Perl-compatible lookaround assertions are atomic. If an assertion is true, but there is a subsequent matching failure, there is no backtracking into the assertion. However, there are some cases where non-atomic assertions can be @@ -2701,6 +2882,65 @@ conditional groups (see below) must be atomic. . . +.\" HTML +.SH "SCAN SUBSTRING ASSERTIONS" +.rs +.sp +A special kind of assertion, not compatible with Perl, makes it possible to +check the contents of a captured substring by matching it with a subpattern. +Because this involves capturing, this feature is not supported by +\fBpcre2_dfa_match()\fP. +.P +A scan substring assertion starts with the sequence (*scan_substring: or +(*scs: which is followed by a list of substring numbers (absolute or relative) +and/or substring names enclosed in single quotes or angle brackets, all within +parentheses. The rest of the item is the subpattern that is applied to the +substring, as shown in these examples: +.sp + (*scan_substring:(1)...) + (*scs:(-2)...) + (*scs:('AB')...) + (*scs:(1,'AB',-2)...) +.sp +The list of groups is checked in the order they are given, and it is the +contents of the first one that is found to be set that are scanned. When +PCRE2_DUPNAMES is set and there are ambiguous group names, all groups with the +same name are checked in numerical order. A scan substring assertion fails if +none of the groups it references have been set. +.P +The pattern match on the substring is always anchored, that is, it must match +from the start of the substring. There is no "bumpalong" if it does not match +at the start. The end of the subject is temporarily reset to be the end of the +substring, so \eZ, \ez, and $ will match there. However, the start of the +subject is \fInot\fP reset. This means that ^ matches only if the substring is +actually at the start of the main subject, but it also means that lookbehind +assertions into what precedes the substring are possible. +.P +Here is a very simple example: find a word that contains the rare (in English) +sequence of letters "rh" not at the start: +.sp + \eb(\ew++)(*scs:(1).+rh) +.sp +The first group captures a word which is then scanned by the second group. +This example does not actually need this heavyweight feature; the same match +can be achieved with: +.sp + \eb\ew+?rh\ew*\eb +.sp +When things are more complicated, however, scanning a captured substring can be +a useful way to describe the required match. For exmple, there is a rather +complicated pattern in the PCRE2 test data that checks an entire subject string +for a palindrome, that is, the sequence of letters is the same in both +directions. Suppose you want to search for individual words of two or more +characters such as "level" that are palindromes: +.sp + (\eb\ew{2,}+\eb)(*scs:(1)...palindrome-matching-pattern...) +.sp +Within a substring scanning subpattern, references to other groups work as +normal. Capturing groups may appear, and will retain their values during +ongoing matching if the assertion succeeds. +. +. .SH "SCRIPT RUNS" .rs .sp @@ -2986,8 +3226,8 @@ for which captures are retained only for positive assertions that succeed.) There are two ways of including comments in patterns that are processed by PCRE2. In both cases, the start of the comment must not be in a character class, nor in the middle of any other sequence of related characters such as -(?: or a group name or number. The characters that make up a comment play -no part in the pattern matching. +(?: or a group name or number or a Unicode property name. The characters that +make up a comment play no part in the pattern matching. .P The sequence (?# marks the start of a comment that continues up to the next closing parenthesis. Nested parentheses are not permitted. If the @@ -3289,7 +3529,9 @@ code. The feature is called "callout". The caller of PCRE2 provides an external function by putting its entry point in a match context using the function \fBpcre2_set_callout()\fP, and then passing that context to \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout -entry point is set to NULL, callouts are disabled. +entry point is set to NULL, callout points will be passed over silently during +matching. To disallow callouts in the pattern syntax, you may use the +PCRE2_EXTRA_NEVER_CALLOUT option. .P Within a regular expression, (?C) indicates a point at which the external function is to be called. There are two kinds of callout: those with a @@ -3389,8 +3631,8 @@ not there. Any number of these verbs may occur in a pattern. Except for .P Since these verbs are specifically related to backtracking, most of them can be used only when the pattern is to be matched using the traditional matching -function, because that uses a backtracking algorithm. With the exception of -(*FAIL), which behaves like a failing negative assertion, the backtracking +function or JIT, because they use backtracking algorithms. With the exception +of (*FAIL), which behaves like a failing negative assertion, the backtracking control verbs cause an error if encountered by the DFA matching function. .P The behaviour of these verbs in @@ -3420,7 +3662,8 @@ minimum length of matching subject, or that a particular character must be present. When one of these optimizations bypasses the running of a match, any included backtracking verbs will not, of course, be processed. You can suppress the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option -when calling \fBpcre2_compile()\fP, or by starting the pattern with +when calling \fBpcre2_compile()\fP, by calling \fBpcre2_set_optimize()\fP with a +PCRE2_START_OPTIMIZE_OFF directive, or by starting the pattern with (*NO_START_OPT). There is more discussion of this option in the section entitled .\" HTML @@ -3554,7 +3797,8 @@ attempts starting at "P" and then with an empty string do not get as far as the (*MARK) item, but nevertheless do not reset it. .P If you are interested in (*MARK) values after failed matches, you should -probably set the PCRE2_NO_START_OPTIMIZE option +probably either set the PCRE2_NO_START_OPTIMIZE option or call +\fBpcre2_set_optimize()\fP with a PCRE2_START_OPTIMIZE_OFF directive .\" HTML .\" (see above) @@ -3569,9 +3813,9 @@ The following verbs do nothing when they are encountered. Matching continues with what follows, but if there is a subsequent match failure, causing a backtrack to the verb, a failure is forced. That is, backtracking cannot pass to the left of the verb. However, when one of these verbs appears inside an -atomic group or in a lookaround assertion that is true, its effect is confined -to that group, because once the group has been matched, there is never any -backtracking into it. Backtracking from beyond an assertion or an atomic group +atomic group or in an atomic lookaround assertion that is true, its effect is +confined to that group, because once the group has been matched, there is never +any backtracking into it. Backtracking from beyond an atomic assertion or group ignores the entire group, and seeks a preceding backtracking point. .P These verbs differ in exactly what kind of failure occurs when backtracking @@ -3823,12 +4067,17 @@ backtrack that occurs after such an assertion is complete does not jump back into the assertion. Note in particular that a (*MARK) name that is set in an assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern. .P -PCRE2 now supports non-atomic positive assertions, as described in the section -entitled +PCRE2 now supports non-atomic positive assertions and also "scan substring" +assertions, as described in the sections entitled .\" HTML .\" "Non-atomic assertions" .\" +and +.\" HTML +.\" +"Scan substring assertions" +.\" above. These assertions must be standalone (not used as conditions). They are not Perl-compatible. For these assertions, a later backtrack does jump back into the assertion, and therefore verbs such as (*COMMIT) can be triggered by @@ -3836,7 +4085,8 @@ backtracks from later in the pattern. .P The effect of (*THEN) is not allowed to escape beyond an assertion. If there are no more branches to try, (*THEN) causes a positive assertion to be false, -and a negative assertion to be true. +and a negative assertion to be true. This behaviour differs from Perl when the +assertion has only one branch. .P The other backtracking verbs are not treated specially if they appear in a standalone positive assertion. In a conditional positive assertion, @@ -3870,6 +4120,51 @@ is no such group within the subroutine's group, the subroutine match fails and there is a backtrack at the outer level. . . +.\" HTML +.SH "EBCDIC ENVIRONMENTS" +.rs +.sp +Differences in the way PCRE behaves when it is running in an EBCDIC environment +are covered in this section. +. +. +.SS "Escape sequences" +.rs +.sp +When PCRE2 is compiled in EBCDIC mode, \eN{U+hhh..} is not supported. \ea, \ee, +\ef, \en, \er, and \et generate the appropriate EBCDIC code values. The \ec +escape is processed as specified for Perl in the \fBperlebcdic\fP document. The +only characters that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], +^, _, or ?. Any other character provokes a compile-time error. The sequence +\ec@ encodes character code 0; after \ec the letters (in either case) encode +characters 1-26 (hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31 +(hex 1B to hex 1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F). +.P +Thus, apart from \ec?, these escapes generate the same character code values as +they do in an ASCII or Unicode environment, though the meanings of the values +mostly differ. For example, \ecG always generates code value 7, which is BEL in +ASCII but DEL in EBCDIC. +.P +The sequence \ec? generates DEL (127, hex 7F) in an ASCII environment, but +because 127 is not a control character in EBCDIC, Perl makes it generate the +APC character. Unfortunately, there are several variants of EBCDIC. In most of +them the APC character has the value 255 (hex FF), but in the one Perl calls +POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC +values, PCRE2 makes \ec? generate 95; otherwise it generates 255. +. +. +.SS "Character classes" +.rs +.sp +In character classes there is a special case in EBCDIC environments for ranges +whose end points are both specified as literal letters in the same case. For +compatibility with Perl, EBCDIC code points within the range that are not +letters are omitted. For example, [h-k] matches only four characters, even +though the EBCDIC codes for h and k are 0x88 and 0x92, a range of 11 code +points. However, if the range is specified numerically, for example, +[\ex88-\ex92] or [h-\ex92], all code points are included. +. +. .SH "SEE ALSO" .rs .sp @@ -3891,6 +4186,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 04 June 2024 +Last updated: 27 November 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2perform.3 b/doc/pcre2perform.3 index 72aa67a..95d74f9 100644 --- a/doc/pcre2perform.3 +++ b/doc/pcre2perform.3 @@ -1,4 +1,4 @@ -.TH PCRE2PERFORM 3 "27 July 2022" "PCRE2 10.41" +.TH PCRE2PERFORM 3 "06 December 2022" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 PERFORMANCE" @@ -255,6 +255,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 27 July 2022 +Last updated: 06 December 2022 Copyright (c) 1997-2022 University of Cambridge. .fi diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3 index 3709299..506ca71 100644 --- a/doc/pcre2posix.3 +++ b/doc/pcre2posix.3 @@ -1,4 +1,4 @@ -.TH PCRE2POSIX 3 "19 January 2024" "PCRE2 10.43" +.TH PCRE2POSIX 3 "27 November 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SYNOPSIS" @@ -143,7 +143,7 @@ REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard. .sp When a pattern that is compiled with this flag is passed to \fBpcre2_regexec()\fP for matching, the \fInmatch\fP and \fIpmatch\fP arguments -are ignored, and no captured strings are returned. Versions of the PCRE library +are ignored, and no captured strings are returned. Versions of the PCRE2 library prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens because it disables the use of backreferences. .sp @@ -343,6 +343,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 19 January 2024 +Last updated: 27 November 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2sample.3 b/doc/pcre2sample.3 index 1e93ba4..bfadb21 100644 --- a/doc/pcre2sample.3 +++ b/doc/pcre2sample.3 @@ -1,4 +1,4 @@ -.TH PCRE2SAMPLE 3 "02 February 2016" "PCRE2 10.22" +.TH PCRE2SAMPLE 3 "14 November 2023" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 SAMPLE PROGRAM" @@ -94,6 +94,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 02 February 2016 +Last updated: 14 November 2023 Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/doc/pcre2serialize.3 b/doc/pcre2serialize.3 index 6fe2c64..59e98cb 100644 --- a/doc/pcre2serialize.3 +++ b/doc/pcre2serialize.3 @@ -1,4 +1,4 @@ -.TH PCRE2SERIALIZE 3 "27 June 2018" "PCRE2 10.32" +.TH PCRE2SERIALIZE 3 "19 January 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS" @@ -193,6 +193,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 27 June 2018 +Last updated: 19 January 2024 Copyright (c) 1997-2018 University of Cambridge. .fi diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 index 6f4f7aa..70cf82c 100644 --- a/doc/pcre2syntax.3 +++ b/doc/pcre2syntax.3 @@ -1,16 +1,21 @@ -.TH PCRE2SYNTAX 3 "12 October 2023" "PCRE2 10.43" +.TH PCRE2SYNTAX 3 "27 November 2024" "PCRE2 10.45-RC1" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" .rs .sp -The full syntax and semantics of the regular expressions that are supported by -PCRE2 are described in the +The full syntax and semantics of the regular expression patterns that are +supported by PCRE2 are described in the .\" HREF \fBpcre2pattern\fP .\" -documentation. This document contains a quick-reference summary of the syntax. -. +documentation. This document contains a quick-reference summary of the pattern +syntax followed by the syntax of replacement strings in substitution function. +The full description of the latter is in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. . .SH "QUOTING" .rs @@ -19,7 +24,12 @@ documentation. This document contains a quick-reference summary of the syntax. \eQ...\eE treat enclosed characters as literal .sp Note that white space inside \eQ...\eE is always treated as literal, even if -PCRE2_EXTENDED is set, causing most other white space to be ignored. +PCRE2_EXTENDED is set, causing most other white space to be ignored. Note also +that PCRE2's handling of \eQ...\eE has some differences from Perl's. See the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation for details. . . .SH "BRACED ITEMS" @@ -54,6 +64,10 @@ sequence causes an error. \exhh character with hex code hh \ex{hh..} character with hex code hh.. .sp +\eN{U+hh..} is synonymous with \ex{hh..} but is not supported in environments +that use EBCDIC code (mainly IBM mainframes). Note that \eN not followed by an +opening curly bracket has a different meaning (see below). +.P If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the following are also recognized: .sp @@ -61,7 +75,7 @@ following are also recognized: \euhhhh character with hex code hhhh \eu{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX .sp -When \ex is not followed by {, from zero to two hexadecimal digits are read, +When \ex is not followed by {, one or two hexadecimal digits are read, but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be recognized as a hexadecimal escape; otherwise it matches a literal "x". Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits @@ -79,9 +93,7 @@ in the \fBpcre2pattern\fP .\" documentation, where details of escape processing in EBCDIC environments are -also given. \eN{U+hh..} is synonymous with \ex{hh..} in PCRE2 but is not -supported in EBCDIC environments. Note that \eN not followed by an opening -curly bracket has a different meaning (see below). +also given. . . .SH "CHARACTER TYPES" @@ -120,8 +132,9 @@ characters, but there are some option settings that can restrict individual sequences to matching only ASCII characters. .P Property descriptions in \ep and \eP are matched caselessly; hyphens, -underscores, and white space are ignored, in accordance with Unicode's "loose -matching" rules. +underscores, and ASCII white space characters are ignored, in accordance with +Unicode's "loose matching" rules. For example, \ep{Bidi_Class=al} is the same +as \ep{ bidi class = AL }. . . .SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP" @@ -135,13 +148,13 @@ matching" rules. Cs Surrogate .sp L Letter + Lc Cased letter, the union of Ll, Lu, and Lt + L& Synonym of Lc Ll Lower case letter Lm Modifier letter Lo Other letter Lt Title case letter Lu Upper case letter - Lc Ll, Lu, or Lt - L& Ll, Lu, or Lt .sp M Mark Mc Spacing mark @@ -172,6 +185,9 @@ matching" rules. Zl Line separator Zp Paragraph separator Zs Space separator +.sp +From release 10.45, when caseless matching is set, Ll, Lu, and Lt are all +equivalent to Lc. . . .SH "PCRE2 SPECIAL CATEGORY PROPERTIES FOR \ep and \eP" @@ -240,7 +256,7 @@ The recognized classes are: RLI right-to-left isolate RLO right-to-left override S segment separator - WS which space + WS white space . . .SH "CHARACTER CLASSES" @@ -270,6 +286,43 @@ The recognized classes are: In PCRE2, POSIX character set names recognize only ASCII characters by default, but some of them use Unicode properties if PCRE2_UCP is set. You can use \eQ...\eE inside a character class. +.P +When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes may be +used, allowing nested character classes, combined using set operators. +.sp + [x&&[^y]] UTS#18 extended character class +.sp + x||y set union (OR) + x&&y set intersection (AND) + x--y set difference (AND NOT) + x~~y set symmetric difference (XOR) +.sp +. +. +.SH "PERL EXTENDED CHARACTER CLASSES" +.rs +.sp + (?[...]) Perl extended character class + (?[\ep{Thai} & \ep{Nd}]) operators; whitespace ignored + (?[(x - y) & z]) parentheses for grouping +.sp + (?[ [^3] & \ep{Nd} ]) [...] is a nested ordinary class + (?[ [:alpha:] - [z] ]) POSIX set is allowed outside [...] + (?[ \ed - [3] ]) backslash-escaped set is allowed outside [...] + (?[ !\en & [:ascii:] ]) backslash-escaped character is allowed outside [...] + all other characters or ranges must be enclosed in [...] +.sp + x|y, x+y set union (OR) + x&y set intersection (AND) + x-y set difference (AND NOT) + x^y set symmetric difference (XOR) + !x set complement (NOT) +.sp +Inside a Perl extended character class, [...] switches mode to be interpreted +as an ordinary character class. Outside of a nested [...], the only items +permitted are backslash-escapes, POSIX sets, operators, and parentheses. Inside +a nested ordinary class, ^ has its usual meaning (inverts the class when used +as the first character); outside of a nested class, ^ is the XOR operator. . . .SH "QUANTIFIERS" @@ -384,7 +437,7 @@ of the group. (?^) unset imnrsx options .sp (?aP) implies (?aT) as well, though this has no additional effect. However, it -means that (?-aP) is really (?-PT) which disables all ASCII restrictions for +means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for POSIX classes. .P Unsetting x or xx unsets both. Several options may be set at once, and a @@ -394,20 +447,22 @@ only one hyphen. Setting (but no unsetting) is allowed after (?^ for example example (?i:...). .P The following are recognized only at the very start of a pattern or after one -of the newline or \eR options with similar syntax. More than one of them may -appear. For the first three, d is a decimal number. -.sp - (*LIMIT_DEPTH=d) set the backtracking limit to d - (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes - (*LIMIT_MATCH=d) set the match limit to d - (*NOTEMPTY) set PCRE2_NOTEMPTY when matching - (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching - (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) +of the newline or \eR sequences or options with similar syntax. More than one +of them may appear. For the first three, d is a decimal number. +.sp + (*LIMIT_DEPTH=d) set the backtracking limit to d + (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes + (*LIMIT_MATCH=d) set the match limit to d + (*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) - (*NO_JIT) disable JIT optimization - (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) - (*UTF) set appropriate UTF mode for the library in use - (*UCP) set PCRE2_UCP (use Unicode properties for \ed etc) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) + (*TURKISH_CASING) set PCRE2_EXTRA_TURKISH_CASING when matching + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \ed etc) .sp Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of the limits set by the caller of \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP, @@ -481,6 +536,23 @@ These assertions are specific to PCRE2 and are not Perl-compatible. (*non_atomic_positive_lookbehind:...) ) . . +.SH "SUBSTRING SCAN ASSERTION" +.rs +This feature is not Perl-compatible. +.sp + (*scan_substring:(grouplist)...) scan captured substring + (*scs:(grouplist)...) scan captured substring +.sp +The comma-separated list may identify groups in any of the following ways: +.sp + n absolute reference + +n relative reference + -n relative reference + name + 'name' name +.sp +. +. .SH "SCRIPT RUNS" .rs .sp @@ -592,6 +664,52 @@ start and the end), and the starting delimiter { matched with the ending delimiter }. To encode the ending delimiter within the string, double it. . . +.SH "REPLACEMENT STRINGS" +.rs +.sp +If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for +\fBpcre2_substitute()\fP is not interpreted. Otherwise, by default, the only +special character is the dollar character in one of the following forms: +.sp + $$ insert a dollar character + $n or ${n} insert the contents of group \fIn\fP + $ insert the contents of named group + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string + $*MARK or ${*MARK} insert a control verb name +.sp +For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is set, +there is additional interpretation: +.P +1. Backslash is an escape character, and the forms described in "ESCAPED +CHARACTERS" above are recognized. Also: +.sp + \eQ...\eE can be used to suppress interpretation + \el force the next character to lower case + \eu force the next character to upper case + \eL force subsequent characters to lower case + \eU force subsequent characters to upper case + \eu\eL force next character to upper case, then all lower + \el\eU force next character to lower case, then all upper + \eE end \eL or \eU case forcing + \eb backspace character (note: as in character class in pattern) + \ev vertical tab character (note: not the same as in a pattern) +.sp +2. The Python form \eg, where the angle brackets are part of the syntax and +\fIn\fP is either a group name or a number, is recognized as an alternative way +of inserting the contents of a group, for example \eg<3>. +.P +3. Capture substitution supports the following additional forms: +.sp + ${n:-string} default for unset group + ${n:+string1:string2} values for set/unset group +.sp +The substitution strings themselves are expanded. Backslash can be used to +escape colons and closing curly brackets. +. +. .SH "SEE ALSO" .rs .sp @@ -613,6 +731,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 12 October 2023 -Copyright (c) 1997-2023 University of Cambridge. +Last updated: 27 November 2024 +Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index c7df418..1f0c471 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "24 April 2024" "PCRE 10.44" +.TH PCRE2TEST 1 "26 December 2024" "PCRE2 10.45-RC1" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -76,8 +76,8 @@ possible to include binary zeros. .sp When testing the 16-bit or 32-bit libraries, there is a need to be able to generate character code points greater than 255 in the strings that are passed -to the library. For subject lines, backslash escapes can be used. In addition, -when the \fButf\fP modifier (see +to the library. For subject lines and some patterns, backslash escapes can be +used. In addition, when the \fButf\fP modifier (see .\" HTML .\" "Setting compilation options" @@ -97,9 +97,8 @@ UTF-8 (in its original definition) is not capable of encoding values greater than 0x7fffffff, but such values can be handled by the 32-bit library. When testing this library in non-UTF mode with \fButf8_input\fP set, if any character is preceded by the byte 0xff (which is an invalid byte in UTF-8) -0x80000000 is added to the character's value. This is the only way of passing -such code points in a pattern string. For subject strings, using an escape -sequence is preferable. +0x80000000 is added to the character's value. For subject strings, using an +escape sequence is preferable. . . .SH "COMMAND LINE OPTIONS" @@ -145,8 +144,8 @@ functionality is intended for use in scripts such as \fBRunTest\fP. The following options output the value and set the exit code as indicated: .sp ebcdic-nl the code for LF (= NL) in an EBCDIC environment: - 0x15 or 0x25 - 0 if used in an ASCII environment + either 0x15 or 0x25 + 0 if used in an ASCII/Unicode environment exit code is always 0 linksize the configured internal link size (2, 3, or 4) exit code is set to the link size @@ -168,6 +167,16 @@ to the same value: pcre2-8 the 8-bit library was built unicode Unicode support is available .sp +Note that the availability of JIT support in the library does not guarantee +that it can actually be used because in some environments it is unable to +allocate executable memory. The option "jitusable" gives more detailed +information. It returns one of the following values: +.sp + 0 JIT is available and usable + 1 JIT is available but cannot allocate executable memory + 2 JIT is not available + 3 Unexpected return from test call to \fBpcre2_jit_compile()\fP +.sp If an unknown option is given, an error message is output; the exit code is 0. .TP 10 \fB-d\fP @@ -483,36 +492,45 @@ space is removed, and the line is scanned for backslash escapes, unless the \fBsubject_literal\fP modifier was set for the pattern. The following provide a means of encoding non-printing characters in a visible way: .sp - \ea alarm (BEL, \ex07) - \eb backspace (\ex08) - \ee escape (\ex27) - \ef form feed (\ex0c) - \en newline (\ex0a) - \er carriage return (\ex0d) - \et tab (\ex09) - \ev vertical tab (\ex0b) - \ennn octal character (up to 3 octal digits); always - a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode - \eo{dd...} octal character (any number of octal digits} - \exhh hexadecimal byte (up to 2 hex digits) - \ex{hh...} hexadecimal character (any number of hex digits) -.sp -The use of \ex{hh...} is not dependent on the use of the \fButf\fP modifier on -the pattern. It is recognized always. There may be any number of hexadecimal -digits inside the braces; invalid values provoke error messages. -.P -Note that \exhh specifies one byte rather than one character in UTF-8 mode; -this makes it possible to construct invalid UTF-8 sequences for testing -purposes. On the other hand, \ex{hh} is interpreted as a UTF-8 character in -UTF-8 mode, generating more than one byte if the value is greater than 127. -When testing the 8-bit library not in UTF-8 mode, \ex{hh} generates one byte -for values less than 256, and causes an error for greater values. -.P -In UTF-16 mode, all 4-digit \ex{hhhh} values are accepted. This makes it -possible to construct invalid UTF-16 sequences for testing purposes. -.P -In UTF-32 mode, all 4- to 8-digit \ex{...} values are accepted. This makes it -possible to construct invalid UTF-32 sequences for testing purposes. + \ea alarm (BEL, \ex07) + \eb backspace (\ex08) + \ee escape (\ex27) + \ef form feed (\ex0c) + \en newline (\ex0a) + \eN{U+hh...} unicode character (any number of hex digits) + \er carriage return (\ex0d) + \et tab (\ex09) + \ev vertical tab (\ex0b) + \eddd octal number (up to 3 octal digits); represent a single + code point unless larger than 255 with the 8-bit library + \eo{dd...} octal number (any number of octal digits} representing a + character in UTF mode or a code point + \exhh hexadecimal byte (up to 2 hex digits) + \ex{hh...} hexadecimal number (up to 8 hex digits) representing a + character in UTF mode or a code point +.sp +Invoking \eN{U+hh...} or \ex{hh...} doesn't require the use of the \fButf\fP +modifier on the pattern. It is always recognized. There may be any number of +hexadecimal digits inside the braces; invalid values provoke error messages +but when using \eN{U+hh...} with some invalid unicode characters they will +be accepted with a warning instead. +.P +Note that even in UTF-8 mode, \exhh (and depending of how large, \eddd) +describe one byte rather than one character; this makes it possible to +construct invalid UTF-8 sequences for testing purposes. On the other hand, +\ex{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only generating +more than one byte if the value is greater than 127. To avoid the ambiguity +it is preferred to use \eN{U+hh...} when describing characters. When testing +the 8-bit library not in UTF-8 mode, \ex{hh} generates one byte for values +that could fit on it, and causes an error for greater values. +.P +When testing the 16-bit library, not in UTF-16 mode, all 4-digit \ex{hhhh} +values are accepted. This makes it possible to construct invalid UTF-16 +sequences for testing purposes. +.P +When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit \ex{...} +values are accepted. This makes it possible to construct invalid UTF-32 +sequences for testing purposes. .P There is a special backslash sequence that specifies replication of one or more characters: @@ -580,6 +598,7 @@ for a description of the effects of these options. allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_extended_class set PCRE2_ALT_EXTENDED_CLASS alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED /a ascii_all set all ASCII options @@ -608,13 +627,17 @@ for a description of the effects of these options. match_word set PCRE2_EXTRA_MATCH_WORD /m multiline set PCRE2_MULTILINE never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_callout set PCRE2_EXTRA_NEVER_CALLOUT never_ucp set PCRE2_NEVER_UCP never_utf set PCRE2_NEVER_UTF /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_bs0 set PCRE2_EXTRA_NO_BS0 no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR no_start_optimize set PCRE2_NO_START_OPTIMIZE no_utf_check set PCRE2_NO_UTF_CHECK + python_octal set PCRE2_EXTRA_PYTHON_OCTAL + turkish_casing set PCRE2_EXTRA_TURKISH_CASING ucp set PCRE2_UCP ungreedy set PCRE2_UNGREEDY use_offset_limit set PCRE2_USE_OFFSET_LIMIT @@ -626,6 +649,24 @@ notation. Otherwise, those less than 0x100 are output in hex without the curly brackets. Setting \fButf\fP in 16-bit or 32-bit mode also causes pattern and subject strings to be translated to UTF-16 or UTF-32, respectively, before being passed to library functions. +.sp +The following modifiers enable or disable performance optimizations by +calling \fBpcre2_set_optimize()\fP before invoking the regex compiler. +.sp + optimization_full enable all optional optimizations + optimization_none disable all optional optimizations + auto_possess auto-possessify variable quantifiers + auto_possess_off don't auto-possessify variable quantifiers + dotstar_anchor anchor patterns starting with .* + dotstar_anchor_off don't anchor patterns starting with .* + start_optimize enable pre-scan of subject string + start_optimize_off disable pre-scan of subject string +.sp +See the +.\" HREF +\fBpcre2_set_optimize\fP +.\" +documentation for details on these optimizations. . . .\" HTML @@ -636,14 +677,15 @@ The following modifiers affect the compilation process or request information about the pattern. There are single-letter abbreviations for some that are heavily used in the test files. .sp - bsr=[anycrlf|unicode] specify \eR handling /B bincode show binary code without lengths + bsr=[anycrlf|unicode] specify \eR handling callout_info show callout information convert= request foreign pattern conversion convert_glob_escape=c set glob escape character convert_glob_separator=c set glob separator character convert_length set convert buffer length debug same as info,fullbincode + expand expand repetition syntax in pattern framesize show matching frame size fullbincode show binary code with lengths /I info show info about compiled pattern @@ -665,6 +707,7 @@ heavily used in the test files. posix_nosub use the POSIX API with REG_NOSUB push push compiled pattern onto the stack pushcopy push a copy onto the stack + pushtablescopy push a copy with tables onto the stack stackguard= test the stackguard feature subject_literal treat all subject lines as literal tables=[0|1|2|3] select internal tables @@ -1086,6 +1129,7 @@ process. replace= specify a replacement string startchar show starting character when relevant substitute_callout use substitution callouts + substitute_case_callout use substitution case callouts substitute_extended use PCRE2_SUBSTITUTE_EXTENDED substitute_literal use PCRE2_SUBSTITUTE_LITERAL substitute_matched use PCRE2_SUBSTITUTE_MATCHED @@ -1181,11 +1225,12 @@ command are of two types. The following modifiers set options for \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. See .\" HREF -\fBpcreapi\fP +\fBpcre2api\fP .\" for a description of their effects. .sp anchored set PCRE2_ANCHORED + copy_matched_subject set PCRE2_COPY_MATCHED_SUBJECT endanchored set PCRE2_ENDANCHORED dfa_restart set PCRE2_DFA_RESTART dfa_shortest set PCRE2_DFA_SHORTEST @@ -1236,8 +1281,8 @@ pattern, but can be overridden by modifiers on the subject. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allvector show the entire ovector allusedtext show all consulted text (non-JIT only) + allvector show the entire ovector altglobal alternative global matching callout_capture show captures at callout time callout_data= set a value to pass via callouts @@ -1271,7 +1316,8 @@ pattern, but can be overridden by modifiers on the subject. startchar show startchar when relevant startoffset= same as offset= substitute_callout use substitution callouts - substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED + substitute_case_callout use substitution case callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED substitute_literal use PCRE2_SUBSTITUTE_LITERAL substitute_matched use PCRE2_SUBSTITUTE_MATCHED substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH @@ -1556,6 +1602,20 @@ If both are set for the same number, stop takes precedence. Only a single skip or stop is supported, which is sufficient for testing that the feature works. . . +.SS "Testing substitute case callouts" +.rs +.sp +If the \fBsubstitute_case_callout\fP modifier is set, a substitution +case callout function is set up. The callout function is called for each +substituted chunk which is to be case-transformed. +.P +The callout function passed is a fixed function with implementation for certain +behaviours: inputs which shrink when case-transformed; inputs which grow; inputs +with distinct upper/lower/titlecase forms. The characters which are not +special-cased for testing purposes are left unmodified, as if they are caseless +characters. +. +. .SS "Setting the JIT stack size" .rs .sp @@ -2181,6 +2241,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 24 April 2024 +Last updated: 26 December 2024 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt index ddb491d..bd23153 100644 --- a/doc/pcre2test.txt +++ b/doc/pcre2test.txt @@ -1,4 +1,3 @@ - PCRE2TEST(1) General Commands Manual PCRE2TEST(1) @@ -72,26 +71,25 @@ INPUT ENCODING When testing the 16-bit or 32-bit libraries, there is a need to be able to generate character code points greater than 255 in the strings that - are passed to the library. For subject lines, backslash escapes can be - used. In addition, when the utf modifier (see "Setting compilation op- - tions" below) is set, the pattern and any following subject lines are - interpreted as UTF-8 strings and translated to UTF-16 or UTF-32 as ap- - propriate. - - For non-UTF testing of wide characters, the utf8_input modifier can be - used. This is mutually exclusive with utf, and is allowed only in - 16-bit or 32-bit mode. It causes the pattern and following subject - lines to be treated as UTF-8 according to the original definition (RFC + are passed to the library. For subject lines and some patterns, back- + slash escapes can be used. In addition, when the utf modifier (see + "Setting compilation options" below) is set, the pattern and any fol- + lowing subject lines are interpreted as UTF-8 strings and translated to + UTF-16 or UTF-32 as appropriate. + + For non-UTF testing of wide characters, the utf8_input modifier can be + used. This is mutually exclusive with utf, and is allowed only in + 16-bit or 32-bit mode. It causes the pattern and following subject + lines to be treated as UTF-8 according to the original definition (RFC 2279), which allows for character values up to 0x7fffffff. Each charac- - ter is placed in one 16-bit or 32-bit code unit (in the 16-bit case, + ter is placed in one 16-bit or 32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error to occur). - UTF-8 (in its original definition) is not capable of encoding values - greater than 0x7fffffff, but such values can be handled by the 32-bit + UTF-8 (in its original definition) is not capable of encoding values + greater than 0x7fffffff, but such values can be handled by the 32-bit library. When testing this library in non-UTF mode with utf8_input set, if any character is preceded by the byte 0xff (which is an invalid byte - in UTF-8) 0x80000000 is added to the character's value. This is the - only way of passing such code points in a pattern string. For subject + in UTF-8) 0x80000000 is added to the character's value. For subject strings, using an escape sequence is preferable. @@ -135,8 +133,8 @@ COMMAND LINE OPTIONS the exit code as indicated: ebcdic-nl the code for LF (= NL) in an EBCDIC environment: - 0x15 or 0x25 - 0 if used in an ASCII environment + either 0x15 or 0x25 + 0 if used in an ASCII/Unicode environment exit code is always 0 linksize the configured internal link size (2, 3, or 4) exit code is set to the link size @@ -158,56 +156,67 @@ COMMAND LINE OPTIONS pcre2-8 the 8-bit library was built unicode Unicode support is available - If an unknown option is given, an error message is output; + Note that the availability of JIT support in the library does + not guarantee that it can actually be used because in some + environments it is unable to allocate executable memory. The + option "jitusable" gives more detailed information. It re- + turns one of the following values: + + 0 JIT is available and usable + 1 JIT is available but cannot allocate executable memory + 2 JIT is not available + 3 Unexpected return from test call to pcre2_jit_compile() + + If an unknown option is given, an error message is output; the exit code is 0. - -d Behave as if each pattern has the debug modifier; the inter- + -d Behave as if each pattern has the debug modifier; the inter- nal form and information about the compiled pattern is output after compilation; -d is equivalent to -b -i. -dfa Behave as if each subject line has the dfa modifier; matching - is done using the pcre2_dfa_match() function instead of the + is done using the pcre2_dfa_match() function instead of the default pcre2_match(). -error number[,number,...] - Call pcre2_get_error_message() for each of the error numbers - in the comma-separated list, display the resulting messages - on the standard output, then exit with zero exit code. The - numbers may be positive or negative. This is a convenience + Call pcre2_get_error_message() for each of the error numbers + in the comma-separated list, display the resulting messages + on the standard output, then exit with zero exit code. The + numbers may be positive or negative. This is a convenience facility for PCRE2 maintainers. -help Output a brief summary these options and then exit. - -i Behave as if each pattern has the info modifier; information + -i Behave as if each pattern has the info modifier; information about the compiled pattern is given after compilation. - -jit Behave as if each pattern line has the jit modifier; after - successful compilation, each pattern is passed to the just- + -jit Behave as if each pattern line has the jit modifier; after + successful compilation, each pattern is passed to the just- in-time compiler, if available. - -jitfast Behave as if each pattern line has the jitfast modifier; af- - ter successful compilation, each pattern is passed to the + -jitfast Behave as if each pattern line has the jitfast modifier; af- + ter successful compilation, each pattern is passed to the just-in-time compiler, if available, and each subject line is passed directly to the JIT matcher via its "fast path". -jitverify - Behave as if each pattern line has the jitverify modifier; - after successful compilation, each pattern is passed to the - just-in-time compiler, if available, and the use of JIT for + Behave as if each pattern line has the jitverify modifier; + after successful compilation, each pattern is passed to the + just-in-time compiler, if available, and the use of JIT for matching is verified. -LM List modifiers: write a list of available pattern and subject - modifiers to the standard output, then exit with zero exit - code. All other options are ignored. If both -C and any -Lx + modifiers to the standard output, then exit with zero exit + code. All other options are ignored. If both -C and any -Lx options are present, whichever is first is recognized. - -LP List properties: write a list of recognized Unicode proper- - ties to the standard output, then exit with zero exit code. + -LP List properties: write a list of recognized Unicode proper- + ties to the standard output, then exit with zero exit code. All other options are ignored. If both -C and any -Lx options are present, whichever is first is recognized. -LS List scripts: write a list of recognized Unicode script names - to the standard output, then exit with zero exit code. All + to the standard output, then exit with zero exit code. All other options are ignored. If both -C and any -Lx options are present, whichever is first is recognized. @@ -217,25 +226,25 @@ COMMAND LINE OPTIONS -q Do not output the version number of pcre2test at the start of execution. - -S size On Unix-like systems, set the size of the run-time stack to + -S size On Unix-like systems, set the size of the run-time stack to size mebibytes (units of 1024*1024 bytes). -subject modifier-list Behave as if each subject line contains the given modifiers. - -t Run each compile and match many times with a timer, and out- - put the resulting times per compile or match. When JIT is - used, separate times are given for the initial compile and - the JIT compile. You can control the number of iterations - that are used for timing by following -t with a number (as a - separate item on the command line). For example, "-t 1000" + -t Run each compile and match many times with a timer, and out- + put the resulting times per compile or match. When JIT is + used, separate times are given for the initial compile and + the JIT compile. You can control the number of iterations + that are used for timing by following -t with a number (as a + separate item on the command line). For example, "-t 1000" iterates 1000 times. The default is to iterate 500,000 times. -tm This is like -t except that it times only the matching phase, not the compile phase. - -T -TM These behave like -t and -tm, but in addition, at the end of - a run, the total times for all compiles and matches are out- + -T -TM These behave like -t and -tm, but in addition, at the end of + a run, the total times for all compiles and matches are out- put. -version Output the PCRE2 version number and then exit. @@ -243,153 +252,153 @@ COMMAND LINE OPTIONS DESCRIPTION - If pcre2test is given two filename arguments, it reads from the first + If pcre2test is given two filename arguments, it reads from the first and writes to the second. If the first name is "-", input is taken from - the standard input. If pcre2test is given only one argument, it reads + the standard input. If pcre2test is given only one argument, it reads from that file and writes to stdout. Otherwise, it reads from stdin and writes to stdout. - When pcre2test is built, a configuration option can specify that it - should be linked with the libreadline or libedit library. When this is - done, if the input is from a terminal, it is read using the readline() + When pcre2test is built, a configuration option can specify that it + should be linked with the libreadline or libedit library. When this is + done, if the input is from a terminal, it is read using the readline() function. This provides line-editing and history facilities. The output from the -help option states whether or not readline() will be used. - The program handles any number of tests, each of which consists of a - set of input lines. Each set starts with a regular expression pattern, + The program handles any number of tests, each of which consists of a + set of input lines. Each set starts with a regular expression pattern, followed by any number of subject lines to be matched against that pat- tern. In between sets of test data, command lines that begin with # may appear. This file format, with some restrictions, can also be processed - by the perltest.sh script that is distributed with PCRE2 as a means of + by the perltest.sh script that is distributed with PCRE2 as a means of checking that the behaviour of PCRE2 and Perl is the same. For a speci- - fication of perltest.sh, see the comments near its beginning. See also + fication of perltest.sh, see the comments near its beginning. See also the #perltest command below. When the input is a terminal, pcre2test prompts for each line of input, - using "re>" to prompt for regular expression patterns, and "data>" to - prompt for subject lines. Command lines starting with # can be entered + using "re>" to prompt for regular expression patterns, and "data>" to + prompt for subject lines. Command lines starting with # can be entered only in response to the "re>" prompt. - Each subject line is matched separately and independently. If you want + Each subject line is matched separately and independently. If you want to do multi-line matches, you have to use the \n escape sequence (or \r - or \r\n, etc., depending on the newline setting) in a single line of - input to encode the newline sequences. There is no limit on the length - of subject lines; the input buffer is automatically extended if it is - too small. There are replication features that makes it possible to - generate long repetitive pattern or subject lines without having to + or \r\n, etc., depending on the newline setting) in a single line of + input to encode the newline sequences. There is no limit on the length + of subject lines; the input buffer is automatically extended if it is + too small. There are replication features that makes it possible to + generate long repetitive pattern or subject lines without having to supply them explicitly. - An empty line or the end of the file signals the end of the subject - lines for a test, at which point a new pattern or command line is ex- + An empty line or the end of the file signals the end of the subject + lines for a test, at which point a new pattern or command line is ex- pected if there is still input to be read. COMMAND LINES - In between sets of test data, a line that begins with # is interpreted + In between sets of test data, a line that begins with # is interpreted as a command line. If the first character is followed by white space or - an exclamation mark, the line is treated as a comment, and ignored. + an exclamation mark, the line is treated as a comment, and ignored. Otherwise, the following commands are recognized: #forbid_utf - Subsequent patterns automatically have the PCRE2_NEVER_UTF and - PCRE2_NEVER_UCP options set, which locks out the use of the PCRE2_UTF - and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of - patterns. This command also forces an error if a subsequent pattern - contains any occurrences of \P, \p, or \X, which are still supported - when PCRE2_UTF is not set, but which require Unicode property support + Subsequent patterns automatically have the PCRE2_NEVER_UTF and + PCRE2_NEVER_UCP options set, which locks out the use of the PCRE2_UTF + and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of + patterns. This command also forces an error if a subsequent pattern + contains any occurrences of \P, \p, or \X, which are still supported + when PCRE2_UTF is not set, but which require Unicode property support to be included in the library. - This is a trigger guard that is used in test files to ensure that UTF - or Unicode property tests are not accidentally added to files that are - used when Unicode support is not included in the library. Setting - PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also be obtained - by the use of #pattern; the difference is that #forbid_utf cannot be - unset, and the automatic options are not displayed in pattern informa- + This is a trigger guard that is used in test files to ensure that UTF + or Unicode property tests are not accidentally added to files that are + used when Unicode support is not included in the library. Setting + PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also be obtained + by the use of #pattern; the difference is that #forbid_utf cannot be + unset, and the automatic options are not displayed in pattern informa- tion, to avoid cluttering up test output. #load This command is used to load a set of precompiled patterns from a file, - as described in the section entitled "Saving and restoring compiled + as described in the section entitled "Saving and restoring compiled patterns" below. #loadtables - This command is used to load a set of binary character tables that can - be accessed by the tables=3 qualifier. Such tables can be created by + This command is used to load a set of binary character tables that can + be accessed by the tables=3 qualifier. Such tables can be created by the pcre2_dftables program with the -b option. #newline_default [] - When PCRE2 is built, a default newline convention can be specified. - This determines which characters and/or character pairs are recognized + When PCRE2 is built, a default newline convention can be specified. + This determines which characters and/or character pairs are recognized as indicating a newline in a pattern or subject string. The default can - be overridden when a pattern is compiled. The standard test files con- - tain tests of various newline conventions, but the majority of the - tests expect a single linefeed to be recognized as a newline by de- - fault. Without special action the tests would fail when PCRE2 is com- + be overridden when a pattern is compiled. The standard test files con- + tain tests of various newline conventions, but the majority of the + tests expect a single linefeed to be recognized as a newline by de- + fault. Without special action the tests would fail when PCRE2 is com- piled with either CR or CRLF as the default newline. The #newline_default command specifies a list of newline types that are - acceptable as the default. The types must be one of CR, LF, CRLF, ANY- + acceptable as the default. The types must be one of CR, LF, CRLF, ANY- CRLF, ANY, or NUL (in upper or lower case), for example: #newline_default LF Any anyCRLF If the default newline is in the list, this command has no effect. Oth- - erwise, except when testing the POSIX API, a newline modifier that + erwise, except when testing the POSIX API, a newline modifier that specifies the first newline convention in the list (LF in the above ex- - ample) is added to any pattern that does not already have a newline + ample) is added to any pattern that does not already have a newline modifier. If the newline list is empty, the feature is turned off. This command is present in a number of the standard test input files. - When the POSIX API is being tested there is no way to override the de- + When the POSIX API is being tested there is no way to override the de- fault newline convention, though it is possible to set the newline con- - vention from within the pattern. A warning is given if the posix or - posix_nosub modifier is used when #newline_default would set a default + vention from within the pattern. A warning is given if the posix or + posix_nosub modifier is used when #newline_default would set a default for the non-POSIX API. #pattern - This command sets a default modifier list that applies to all subse- + This command sets a default modifier list that applies to all subse- quent patterns. Modifiers on a pattern can change these settings. #perltest - This line is used in test files that can also be processed by perl- - test.sh to confirm that Perl gives the same results as PCRE2. Subse- - quent tests are checked for the use of pcre2test features that are in- + This line is used in test files that can also be processed by perl- + test.sh to confirm that Perl gives the same results as PCRE2. Subse- + quent tests are checked for the use of pcre2test features that are in- compatible with the perltest.sh script. - Patterns must use '/' as their delimiter, and only certain modifiers - are supported. Comment lines, #pattern commands, and #subject commands - that set or unset "mark" are recognized and acted on. The #perltest, - #forbid_utf, and #newline_default commands, which are needed in the + Patterns must use '/' as their delimiter, and only certain modifiers + are supported. Comment lines, #pattern commands, and #subject commands + that set or unset "mark" are recognized and acted on. The #perltest, + #forbid_utf, and #newline_default commands, which are needed in the relevant pcre2test files, are silently ignored. All other command lines - are ignored, but give a warning message. The #perltest command helps - detect tests that are accidentally put in the wrong file or use the - wrong delimiter. For more details of the perltest.sh script see the + are ignored, but give a warning message. The #perltest command helps + detect tests that are accidentally put in the wrong file or use the + wrong delimiter. For more details of the perltest.sh script see the comments it contains. #pop [] #popcopy [] - These commands are used to manipulate the stack of compiled patterns, - as described in the section entitled "Saving and restoring compiled + These commands are used to manipulate the stack of compiled patterns, + as described in the section entitled "Saving and restoring compiled patterns" below. #save - This command is used to save a set of compiled patterns to a file, as - described in the section entitled "Saving and restoring compiled pat- + This command is used to save a set of compiled patterns to a file, as + described in the section entitled "Saving and restoring compiled pat- terns" below. #subject - This command sets a default modifier list that applies to all subse- - quent subject lines. Modifiers on a subject line can change these set- + This command sets a default modifier list that applies to all subse- + quent subject lines. Modifiers on a subject line can change these set- tings. @@ -397,47 +406,47 @@ MODIFIER SYNTAX Modifier lists are used with both pattern and subject lines. Items in a list are separated by commas followed by optional white space. Trailing - whitespace in a modifier list is ignored. Some modifiers may be given - for both patterns and subject lines, whereas others are valid only for - one or the other. Each modifier has a long name, for example "an- - chored", and some of them must be followed by an equals sign and a - value, for example, "offset=12". Values cannot contain comma charac- - ters, but may contain spaces. Modifiers that do not take values may be + whitespace in a modifier list is ignored. Some modifiers may be given + for both patterns and subject lines, whereas others are valid only for + one or the other. Each modifier has a long name, for example "an- + chored", and some of them must be followed by an equals sign and a + value, for example, "offset=12". Values cannot contain comma charac- + ters, but may contain spaces. Modifiers that do not take values may be preceded by a minus sign to turn off a previous setting. A few of the more common modifiers can also be specified as single let- - ters, for example "i" for "caseless". In documentation, following the + ters, for example "i" for "caseless". In documentation, following the Perl convention, these are written with a slash ("the /i modifier") for - clarity. Abbreviated modifiers must all be concatenated in the first - item of a modifier list. If the first item is not recognized as a long - modifier name, it is interpreted as a sequence of these abbreviations. + clarity. Abbreviated modifiers must all be concatenated in the first + item of a modifier list. If the first item is not recognized as a long + modifier name, it is interpreted as a sequence of these abbreviations. For example: /abc/ig,newline=cr,jit=3 - This is a pattern line whose modifier list starts with two one-letter - modifiers (/i and /g). The lower-case abbreviated modifiers are the + This is a pattern line whose modifier list starts with two one-letter + modifiers (/i and /g). The lower-case abbreviated modifiers are the same as used in Perl. PATTERN SYNTAX - A pattern line must start with one of the following characters (common + A pattern line must start with one of the following characters (common symbols, excluding pattern meta-characters): / ! " ' ` - = _ : ; , % & @ ~ - This is interpreted as the pattern's delimiter. A regular expression - may be continued over several input lines, in which case the newline + This is interpreted as the pattern's delimiter. A regular expression + may be continued over several input lines, in which case the newline characters are included within it. It is possible to include the delim- - iter as a literal within the pattern by escaping it with a backslash, + iter as a literal within the pattern by escaping it with a backslash, for example /abc\/def/ - If you do this, the escape and the delimiter form part of the pattern, + If you do this, the escape and the delimiter form part of the pattern, but since the delimiters are all non-alphanumeric, the inclusion of the - backslash does not affect the pattern's interpretation. Note, however, + backslash does not affect the pattern's interpretation. Note, however, that this trick does not work within \Q...\E literal bracketing because the backslash will itself be interpreted as a literal. If the terminat- ing delimiter is immediately followed by a backslash, for example, @@ -445,13 +454,13 @@ PATTERN SYNTAX /abc/\ a backslash is added to the end of the pattern. This is done to provide - a way of testing the error condition that arises if a pattern finishes + a way of testing the error condition that arises if a pattern finishes with a backslash, because /abc\/ - is interpreted as the first line of a pattern that starts with "abc/", - causing pcre2test to read the next line as a continuation of the regu- + is interpreted as the first line of a pattern that starts with "abc/", + causing pcre2test to read the next line as a continuation of the regu- lar expression. A pattern can be followed by a modifier list (details below). @@ -460,44 +469,52 @@ PATTERN SYNTAX SUBJECT LINE SYNTAX Before each subject line is passed to pcre2_match(), pcre2_dfa_match(), - or pcre2_jit_match(), leading and trailing white space is removed, and - the line is scanned for backslash escapes, unless the subject_literal - modifier was set for the pattern. The following provide a means of en- + or pcre2_jit_match(), leading and trailing white space is removed, and + the line is scanned for backslash escapes, unless the subject_literal + modifier was set for the pattern. The following provide a means of en- coding non-printing characters in a visible way: - \a alarm (BEL, \x07) - \b backspace (\x08) - \e escape (\x27) - \f form feed (\x0c) - \n newline (\x0a) - \r carriage return (\x0d) - \t tab (\x09) - \v vertical tab (\x0b) - \nnn octal character (up to 3 octal digits); always - a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode - \o{dd...} octal character (any number of octal digits} - \xhh hexadecimal byte (up to 2 hex digits) - \x{hh...} hexadecimal character (any number of hex digits) - - The use of \x{hh...} is not dependent on the use of the utf modifier on - the pattern. It is recognized always. There may be any number of hexa- - decimal digits inside the braces; invalid values provoke error mes- - sages. - - Note that \xhh specifies one byte rather than one character in UTF-8 - mode; this makes it possible to construct invalid UTF-8 sequences for - testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8 - character in UTF-8 mode, generating more than one byte if the value is - greater than 127. When testing the 8-bit library not in UTF-8 mode, - \x{hh} generates one byte for values less than 256, and causes an error + \a alarm (BEL, \x07) + \b backspace (\x08) + \e escape (\x27) + \f form feed (\x0c) + \n newline (\x0a) + \N{U+hh...} unicode character (any number of hex digits) + \r carriage return (\x0d) + \t tab (\x09) + \v vertical tab (\x0b) + \ddd octal number (up to 3 octal digits); represent a single + code point unless larger than 255 with the 8-bit li- + brary + \o{dd...} octal number (any number of octal digits} representing a + character in UTF mode or a code point + \xhh hexadecimal byte (up to 2 hex digits) + \x{hh...} hexadecimal number (up to 8 hex digits) representing a + character in UTF mode or a code point + + Invoking \N{U+hh...} or \x{hh...} doesn't require the use of the utf + modifier on the pattern. It is always recognized. There may be any num- + ber of hexadecimal digits inside the braces; invalid values provoke er- + ror messages but when using \N{U+hh...} with some invalid unicode char- + acters they will be accepted with a warning instead. + + Note that even in UTF-8 mode, \xhh (and depending of how large, \ddd) + describe one byte rather than one character; this makes it possible to + construct invalid UTF-8 sequences for testing purposes. On the other + hand, \x{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only + generating more than one byte if the value is greater than 127. To + avoid the ambiguity it is preferred to use \N{U+hh...} when describing + characters. When testing the 8-bit library not in UTF-8 mode, \x{hh} + generates one byte for values that could fit on it, and causes an error for greater values. - In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it - possible to construct invalid UTF-16 sequences for testing purposes. + When testing the 16-bit library, not in UTF-16 mode, all 4-digit + \x{hhhh} values are accepted. This makes it possible to construct in- + valid UTF-16 sequences for testing purposes. - In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This - makes it possible to construct invalid UTF-32 sequences for testing - purposes. + When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit + \x{...} values are accepted. This makes it possible to construct in- + valid UTF-32 sequences for testing purposes. There is a special backslash sequence that specifies replication of one or more characters: @@ -561,6 +578,7 @@ PATTERN MODIFIERS allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_extended_class set PCRE2_ALT_EXTENDED_CLASS alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED /a ascii_all set all ASCII options @@ -589,13 +607,17 @@ PATTERN MODIFIERS match_word set PCRE2_EXTRA_MATCH_WORD /m multiline set PCRE2_MULTILINE never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_callout set PCRE2_EXTRA_NEVER_CALLOUT never_ucp set PCRE2_NEVER_UCP never_utf set PCRE2_NEVER_UTF /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_bs0 set PCRE2_EXTRA_NO_BS0 no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR no_start_optimize set PCRE2_NO_START_OPTIMIZE no_utf_check set PCRE2_NO_UTF_CHECK + python_octal set PCRE2_EXTRA_PYTHON_OCTAL + turkish_casing set PCRE2_EXTRA_TURKISH_CASING ucp set PCRE2_UCP ungreedy set PCRE2_UNGREEDY use_offset_limit set PCRE2_USE_OFFSET_LIMIT @@ -608,20 +630,36 @@ PATTERN MODIFIERS causes pattern and subject strings to be translated to UTF-16 or UTF-32, respectively, before being passed to library functions. + The following modifiers enable or disable performance optimizations by + calling pcre2_set_optimize() before invoking the regex compiler. + + optimization_full enable all optional optimizations + optimization_none disable all optional optimizations + auto_possess auto-possessify variable quantifiers + auto_possess_off don't auto-possessify variable quantifiers + dotstar_anchor anchor patterns starting with .* + dotstar_anchor_off don't anchor patterns starting with .* + start_optimize enable pre-scan of subject string + start_optimize_off disable pre-scan of subject string + + See the pcre2_set_optimize documentation for details on these optimiza- + tions. + Setting compilation controls - The following modifiers affect the compilation process or request in- - formation about the pattern. There are single-letter abbreviations for + The following modifiers affect the compilation process or request in- + formation about the pattern. There are single-letter abbreviations for some that are heavily used in the test files. - bsr=[anycrlf|unicode] specify \R handling /B bincode show binary code without lengths + bsr=[anycrlf|unicode] specify \R handling callout_info show callout information convert= request foreign pattern conversion convert_glob_escape=c set glob escape character convert_glob_separator=c set glob separator character convert_length set convert buffer length debug same as info,fullbincode + expand expand repetition syntax in pattern framesize show matching frame size fullbincode show binary code with lengths /I info show info about compiled pattern @@ -643,6 +681,7 @@ PATTERN MODIFIERS posix_nosub use the POSIX API with REG_NOSUB push push compiled pattern onto the stack pushcopy push a copy onto the stack + pushtablescopy push a copy with tables onto the stack stackguard= test the stackguard feature subject_literal treat all subject lines as literal tables=[0|1|2|3] select internal tables @@ -653,35 +692,35 @@ PATTERN MODIFIERS Newline and \R handling - The bsr modifier specifies what \R in a pattern should match. If it is - set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to - "unicode", \R matches any Unicode newline sequence. The default can be + The bsr modifier specifies what \R in a pattern should match. If it is + set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to + "unicode", \R matches any Unicode newline sequence. The default can be specified when PCRE2 is built; if it is not, the default is set to Uni- code. - The newline modifier specifies which characters are to be interpreted + The newline modifier specifies which characters are to be interpreted as newlines, both in the pattern and in subject lines. The type must be one of CR, LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). Information about a pattern - The debug modifier is a shorthand for info,fullbincode, requesting all + The debug modifier is a shorthand for info,fullbincode, requesting all available information. The bincode modifier causes a representation of the compiled code to be - output after compilation. This information does not contain length and + output after compilation. This information does not contain length and offset values, which ensures that the same output is generated for dif- - ferent internal link sizes and different code unit widths. By using - bincode, the same regression tests can be used in different environ- + ferent internal link sizes and different code unit widths. By using + bincode, the same regression tests can be used in different environ- ments. - The fullbincode modifier, by contrast, does include length and offset - values. This is used in a few special tests that run only for specific + The fullbincode modifier, by contrast, does include length and offset + values. This is used in a few special tests that run only for specific code unit widths and link sizes, and is also useful for one-off tests. - The info modifier requests information about the compiled pattern - (whether it is anchored, has a fixed first character, and so on). The - information is obtained from the pcre2_pattern_info() function. Here + The info modifier requests information about the compiled pattern + (whether it is anchored, has a fixed first character, and so on). The + information is obtained from the pcre2_pattern_info() function. Here are some typical examples: re> /(?i)(^a|^b)/m,info @@ -699,136 +738,136 @@ PATTERN MODIFIERS Last code unit = 'c' (caseless) Subject length lower bound = 3 - "Compile options" are those specified by modifiers; "overall options" - have added options that are taken or deduced from the pattern. If both - sets of options are the same, just a single "options" line is output; - if there are no options, the line is omitted. "First code unit" is - where any match must start; if there is more than one they are listed - as "starting code units". "Last code unit" is the last literal code - unit that must be present in any match. This is not necessarily the - last character. These lines are omitted if no starting or ending code - units are recorded. The subject length line is omitted when - no_start_optimize is set because the minimum length is not calculated + "Compile options" are those specified by modifiers; "overall options" + have added options that are taken or deduced from the pattern. If both + sets of options are the same, just a single "options" line is output; + if there are no options, the line is omitted. "First code unit" is + where any match must start; if there is more than one they are listed + as "starting code units". "Last code unit" is the last literal code + unit that must be present in any match. This is not necessarily the + last character. These lines are omitted if no starting or ending code + units are recorded. The subject length line is omitted when + no_start_optimize is set because the minimum length is not calculated when it can never be used. - The framesize modifier shows the size, in bytes, of each storage frame - used by pcre2_match() for handling backtracking. The size depends on - the number of capturing parentheses in the pattern. A vector of these - frames is used at matching time; its overall size is shown when the + The framesize modifier shows the size, in bytes, of each storage frame + used by pcre2_match() for handling backtracking. The size depends on + the number of capturing parentheses in the pattern. A vector of these + frames is used at matching time; its overall size is shown when the heaframes_size subject modifier is set. - The callout_info modifier requests information about all the callouts + The callout_info modifier requests information about all the callouts in the pattern. A list of them is output at the end of any other infor- mation that is requested. For each callout, either its number or string is given, followed by the item that follows it in the pattern. Passing a NULL context - Normally, pcre2test passes a context block to pcre2_compile(). If the - null_context modifier is set, however, NULL is passed. This is for - testing that pcre2_compile() behaves correctly in this case (it uses + Normally, pcre2test passes a context block to pcre2_compile(). If the + null_context modifier is set, however, NULL is passed. This is for + testing that pcre2_compile() behaves correctly in this case (it uses default values). Passing a NULL pattern - The null_pattern modifier is for testing the behaviour of pcre2_com- - pile() when the pattern argument is NULL. The length value passed is + The null_pattern modifier is for testing the behaviour of pcre2_com- + pile() when the pattern argument is NULL. The length value passed is the default PCRE2_ZERO_TERMINATED unless use_length is set. Any length other than zero causes an error. Specifying pattern characters in hexadecimal - The hex modifier specifies that the characters of the pattern, except - for substrings enclosed in single or double quotes, are to be inter- - preted as pairs of hexadecimal digits. This feature is provided as a + The hex modifier specifies that the characters of the pattern, except + for substrings enclosed in single or double quotes, are to be inter- + preted as pairs of hexadecimal digits. This feature is provided as a way of creating patterns that contain binary zeros and other non-print- - ing characters. White space is permitted between pairs of digits. For + ing characters. White space is permitted between pairs of digits. For example, this pattern contains three characters: /ab 32 59/hex - Parts of such a pattern are taken literally if quoted. This pattern - contains nine characters, only two of which are specified in hexadeci- + Parts of such a pattern are taken literally if quoted. This pattern + contains nine characters, only two of which are specified in hexadeci- mal: /ab "literal" 32/hex - Either single or double quotes may be used. There is no way of includ- - ing the delimiter within a substring. The hex and expand modifiers are + Either single or double quotes may be used. There is no way of includ- + ing the delimiter within a substring. The hex and expand modifiers are mutually exclusive. Specifying the pattern's length By default, patterns are passed to the compiling functions as zero-ter- - minated strings but can be passed by length instead of being zero-ter- - minated. The use_length modifier causes this to happen. Using a length - happens automatically (whether or not use_length is set) when hex is - set, because patterns specified in hexadecimal may contain binary ze- + minated strings but can be passed by length instead of being zero-ter- + minated. The use_length modifier causes this to happen. Using a length + happens automatically (whether or not use_length is set) when hex is + set, because patterns specified in hexadecimal may contain binary ze- ros. If hex or use_length is used with the POSIX wrapper API (see "Using the - POSIX wrapper API" below), the REG_PEND extension is used to pass the + POSIX wrapper API" below), the REG_PEND extension is used to pass the pattern's length. Specifying a maximum for variable lookbehinds - Variable lookbehind assertions are supported only if, for each one, + Variable lookbehind assertions are supported only if, for each one, there is a maximum length (in characters) that it can match. There is a limit on this, whose default can be set at build time, with an ultimate - default of 255. The max_varlookbehind modifier uses the + default of 255. The max_varlookbehind modifier uses the pcre2_set_max_varlookbehind() function to change the limit. Lookbehinds - whose branches each match a fixed length are limited to 65535 charac- + whose branches each match a fixed length are limited to 65535 charac- ters per branch. Specifying wide characters in 16-bit and 32-bit modes In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 - and translated to UTF-16 or UTF-32 when the utf modifier is set. For + and translated to UTF-16 or UTF-32 when the utf modifier is set. For testing the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input - modifier can be used. It is mutually exclusive with utf. Input lines + modifier can be used. It is mutually exclusive with utf. Input lines are interpreted as UTF-8 as a means of specifying wide characters. More details are given in "Input encoding" above. Generating long repetitive patterns - Some tests use long patterns that are very repetitive. Instead of cre- - ating a very long input line for such a pattern, you can use a special - repetition feature, similar to the one described for subject lines - above. If the expand modifier is present on a pattern, parts of the + Some tests use long patterns that are very repetitive. Instead of cre- + ating a very long input line for such a pattern, you can use a special + repetition feature, similar to the one described for subject lines + above. If the expand modifier is present on a pattern, parts of the pattern that have the form \[]{} are expanded before the pattern is passed to pcre2_compile(). For exam- ple, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction - cannot be nested. An initial "\[" sequence is recognized only if "]{" - followed by decimal digits and "}" is found later in the pattern. If + cannot be nested. An initial "\[" sequence is recognized only if "]{" + followed by decimal digits and "}" is found later in the pattern. If not, the characters remain in the pattern unaltered. The expand and hex modifiers are mutually exclusive. - If part of an expanded pattern looks like an expansion, but is really + If part of an expanded pattern looks like an expansion, but is really part of the actual pattern, unwanted expansion can be avoided by giving two values in the quantifier. For example, \[AB]{6000,6000} is not rec- ognized as an expansion item. - If the info modifier is set on an expanded pattern, the result of the + If the info modifier is set on an expanded pattern, the result of the expansion is included in the information that is output. JIT compilation - Just-in-time (JIT) compiling is a heavyweight optimization that can - greatly speed up pattern matching. See the pcre2jit documentation for - details. JIT compiling happens, optionally, after a pattern has been - successfully compiled into an internal form. The JIT compiler converts + Just-in-time (JIT) compiling is a heavyweight optimization that can + greatly speed up pattern matching. See the pcre2jit documentation for + details. JIT compiling happens, optionally, after a pattern has been + successfully compiled into an internal form. The JIT compiler converts this to optimized machine code. It needs to know whether the match-time options PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, - because different code is generated for the different cases. See the - partial modifier in "Subject Modifiers" below for details of how these + because different code is generated for the different cases. See the + partial modifier in "Subject Modifiers" below for details of how these options are specified for each match attempt. JIT compilation is requested by the jit pattern modifier, which may op- - tionally be followed by an equals sign and a number in the range 0 to - 7. The three bits that make up the number specify which of the three + tionally be followed by an equals sign and a number in the range 0 to + 7. The three bits that make up the number specify which of the three JIT operating modes are to be compiled: 1 compile JIT code for non-partial matching @@ -845,31 +884,31 @@ PATTERN MODIFIERS 6 soft and hard partial matching only 7 all three modes - If no number is given, 7 is assumed. The phrase "partial matching" + If no number is given, 7 is assumed. The phrase "partial matching" means a call to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the - PCRE2_PARTIAL_HARD option set. Note that such a call may return a com- + PCRE2_PARTIAL_HARD option set. Note that such a call may return a com- plete match; the options enable the possibility of a partial match, but - do not require it. Note also that if you request JIT compilation only - for partial matching (for example, jit=2) but do not set the partial - modifier on a subject line, that match will not use JIT code because + do not require it. Note also that if you request JIT compilation only + for partial matching (for example, jit=2) but do not set the partial + modifier on a subject line, that match will not use JIT code because none was compiled for non-partial matching. - If JIT compilation is successful, the compiled JIT code will automati- + If JIT compilation is successful, the compiled JIT code will automati- cally be used when an appropriate type of match is run, except when in- - compatible run-time options are specified. For more details, see the - pcre2jit documentation. See also the jitstack modifier below for a way + compatible run-time options are specified. For more details, see the + pcre2jit documentation. See also the jitstack modifier below for a way of setting the size of the JIT stack. - If the jitfast modifier is specified, matching is done using the JIT - "fast path" interface, pcre2_jit_match(), which skips some of the san- - ity checks that are done by pcre2_match(), and of course does not work - when JIT is not supported. If jitfast is specified without jit, jit=7 + If the jitfast modifier is specified, matching is done using the JIT + "fast path" interface, pcre2_jit_match(), which skips some of the san- + ity checks that are done by pcre2_match(), and of course does not work + when JIT is not supported. If jitfast is specified without jit, jit=7 is assumed. - If the jitverify modifier is specified, information about the compiled - pattern shows whether JIT compilation was or was not successful. If - jitverify is specified without jit, jit=7 is assumed. If JIT compila- - tion is successful when jitverify is set, the text "(JIT)" is added to + If the jitverify modifier is specified, information about the compiled + pattern shows whether JIT compilation was or was not successful. If + jitverify is specified without jit, jit=7 is assumed. If JIT compila- + tion is successful when jitverify is set, the text "(JIT)" is added to the first output line after a match or non match when JIT-compiled code was actually used in the match. @@ -880,19 +919,19 @@ PATTERN MODIFIERS /pattern/locale=fr_FR The given locale is set, pcre2_maketables() is called to build a set of - character tables for the locale, and this is then passed to pcre2_com- - pile() when compiling the regular expression. The same tables are used - when matching the following subject lines. The locale modifier applies + character tables for the locale, and this is then passed to pcre2_com- + pile() when compiling the regular expression. The same tables are used + when matching the following subject lines. The locale modifier applies only to the pattern on which it appears, but can be given in a #pattern - command if a default is needed. Setting a locale and alternate charac- + command if a default is needed. Setting a locale and alternate charac- ter tables are mutually exclusive. Showing pattern memory The memory modifier causes the size in bytes of the memory used to hold - the compiled pattern to be output. This does not include the size of - the pcre2_code block; it is just the actual compiled data. If the pat- - tern is subsequently passed to the JIT compiler, the size of the JIT + the compiled pattern to be output. This does not include the size of + the pcre2_code block; it is just the actual compiled data. If the pat- + tern is subsequently passed to the JIT compiler, the size of the JIT compiled code is also output. Here is an example: re> /a(b)c/jit,memory @@ -902,34 +941,34 @@ PATTERN MODIFIERS Limiting nested parentheses - The parens_nest_limit modifier sets a limit on the depth of nested - parentheses in a pattern. Breaching the limit causes a compilation er- - ror. The default for the library is set when PCRE2 is built, but - pcre2test sets its own default of 220, which is required for running + The parens_nest_limit modifier sets a limit on the depth of nested + parentheses in a pattern. Breaching the limit causes a compilation er- + ror. The default for the library is set when PCRE2 is built, but + pcre2test sets its own default of 220, which is required for running the standard test suite. Limiting the pattern length - The max_pattern_length modifier sets a limit, in code units, to the + The max_pattern_length modifier sets a limit, in code units, to the length of pattern that pcre2_compile() will accept. Breaching the limit - causes a compilation error. The default is the largest number a + causes a compilation error. The default is the largest number a PCRE2_SIZE variable can hold (essentially unlimited). Limiting the size of a compiled pattern The max_pattern_compiled_length modifier sets a limit, in bytes, to the amount of memory used by a compiled pattern. Breaching the limit causes - a compilation error. The default is the largest number a PCRE2_SIZE + a compilation error. The default is the largest number a PCRE2_SIZE variable can hold (essentially unlimited). Using the POSIX wrapper API - The posix and posix_nosub modifiers cause pcre2test to call PCRE2 via - the POSIX wrapper API rather than its native API. When posix_nosub is - used, the POSIX option REG_NOSUB is passed to regcomp(). The POSIX - wrapper supports only the 8-bit library. Note that it does not imply + The posix and posix_nosub modifiers cause pcre2test to call PCRE2 via + the POSIX wrapper API rather than its native API. When posix_nosub is + used, the POSIX option REG_NOSUB is passed to regcomp(). The POSIX + wrapper supports only the 8-bit library. Note that it does not imply POSIX matching semantics; for more detail see the pcre2posix documenta- - tion. The following pattern modifiers set options for the regcomp() + tion. The following pattern modifiers set options for the regcomp() function: caseless REG_ICASE @@ -939,42 +978,42 @@ PATTERN MODIFIERS ucp REG_UCP ) the POSIX standard utf REG_UTF8 ) - The regerror_buffsize modifier specifies a size for the error buffer - that is passed to regerror() in the event of a compilation error. For + The regerror_buffsize modifier specifies a size for the error buffer + that is passed to regerror() in the event of a compilation error. For example: /abc/posix,regerror_buffsize=20 - This provides a means of testing the behaviour of regerror() when the - buffer is too small for the error message. If this modifier has not + This provides a means of testing the behaviour of regerror() when the + buffer is too small for the error message. If this modifier has not been set, a large buffer is used. - The aftertext and allaftertext subject modifiers work as described be- + The aftertext and allaftertext subject modifiers work as described be- low. All other modifiers are either ignored, with a warning message, or cause an error. - The pattern is passed to regcomp() as a zero-terminated string by de- + The pattern is passed to regcomp() as a zero-terminated string by de- fault, but if the use_length or hex modifiers are set, the REG_PEND ex- tension is used to pass it by length. Testing the stack guard feature - The stackguard modifier is used to test the use of pcre2_set_com- - pile_recursion_guard(), a function that is provided to enable stack - availability to be checked during compilation (see the pcre2api docu- - mentation for details). If the number specified by the modifier is + The stackguard modifier is used to test the use of pcre2_set_com- + pile_recursion_guard(), a function that is provided to enable stack + availability to be checked during compilation (see the pcre2api docu- + mentation for details). If the number specified by the modifier is greater than zero, pcre2_set_compile_recursion_guard() is called to set - up callback from pcre2_compile() to a local function. The argument it - receives is the current nesting parenthesis depth; if this is greater + up callback from pcre2_compile() to a local function. The argument it + receives is the current nesting parenthesis depth; if this is greater than the value given by the modifier, non-zero is returned, causing the compilation to be aborted. Using alternative character tables - The value specified for the tables modifier must be one of the digits + The value specified for the tables modifier must be one of the digits 0, 1, 2, or 3. It causes a specific set of built-in character tables to - be passed to pcre2_compile(). This is used in the PCRE2 tests to check - behaviour with different character tables. The digit specifies the ta- + be passed to pcre2_compile(). This is used in the PCRE2 tests to check + behaviour with different character tables. The digit specifies the ta- bles as follows: 0 do not pass any special character tables @@ -985,15 +1024,15 @@ PATTERN MODIFIERS In tables 2, some characters whose codes are greater than 128 are iden- tified as letters, digits, spaces, etc. Tables 3 can be used only after - a #loadtables command has loaded them from a binary file. Setting al- + a #loadtables command has loaded them from a binary file. Setting al- ternate character tables and a locale are mutually exclusive. Setting certain match controls The following modifiers are really subject modifiers, and are described - under "Subject Modifiers" below. However, they may be included in a - pattern's modifier list, in which case they are applied to every sub- - ject line that is processed with that pattern. These modifiers do not + under "Subject Modifiers" below. However, they may be included in a + pattern's modifier list, in which case they are applied to every sub- + ject line that is processed with that pattern. These modifiers do not affect the compilation process. aftertext show text after match @@ -1009,6 +1048,7 @@ PATTERN MODIFIERS replace= specify a replacement string startchar show starting character when relevant substitute_callout use substitution callouts + substitute_case_callout use substitution case callouts substitute_extended use PCRE2_SUBSTITUTE_EXTENDED substitute_literal use PCRE2_SUBSTITUTE_LITERAL substitute_matched use PCRE2_SUBSTITUTE_MATCHED @@ -1019,39 +1059,39 @@ PATTERN MODIFIERS substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY - These modifiers may not appear in a #pattern command. If you want them + These modifiers may not appear in a #pattern command. If you want them as defaults, set them in a #subject command. Specifying literal subject lines - If the subject_literal modifier is present on a pattern, all the sub- + If the subject_literal modifier is present on a pattern, all the sub- ject lines that it matches are taken as literal strings, with no inter- - pretation of backslashes. It is not possible to set subject modifiers - on such lines, but any that are set as defaults by a #subject command + pretation of backslashes. It is not possible to set subject modifiers + on such lines, but any that are set as defaults by a #subject command are recognized. Saving a compiled pattern - When a pattern with the push modifier is successfully compiled, it is - pushed onto a stack of compiled patterns, and pcre2test expects the - next line to contain a new pattern (or a command) instead of a subject + When a pattern with the push modifier is successfully compiled, it is + pushed onto a stack of compiled patterns, and pcre2test expects the + next line to contain a new pattern (or a command) instead of a subject line. This facility is used when saving compiled patterns to a file, as - described in the section entitled "Saving and restoring compiled pat- - terns" below. If pushcopy is used instead of push, a copy of the com- - piled pattern is stacked, leaving the original as current, ready to - match the following input lines. This provides a way of testing the - pcre2_code_copy() function. The push and pushcopy modifiers are in- - compatible with compilation modifiers such as global that act at match + described in the section entitled "Saving and restoring compiled pat- + terns" below. If pushcopy is used instead of push, a copy of the com- + piled pattern is stacked, leaving the original as current, ready to + match the following input lines. This provides a way of testing the + pcre2_code_copy() function. The push and pushcopy modifiers are in- + compatible with compilation modifiers such as global that act at match time. Any that are specified are ignored (for the stacked copy), with a - warning message, except for replace, which causes an error. Note that - jitverify, which is allowed, does not carry through to any subsequent + warning message, except for replace, which causes an error. Note that + jitverify, which is allowed, does not carry through to any subsequent matching that uses a stacked pattern. Testing foreign pattern conversion - The experimental foreign pattern conversion functions in PCRE2 can be - tested by setting the convert modifier. Its argument is a colon-sepa- - rated list of options, which set the equivalent option for the + The experimental foreign pattern conversion functions in PCRE2 can be + tested by setting the convert modifier. Its argument is a colon-sepa- + rated list of options, which set the equivalent option for the pcre2_pattern_convert() function: glob PCRE2_CONVERT_GLOB @@ -1063,19 +1103,19 @@ PATTERN MODIFIERS The "unset" value is useful for turning off a default that has been set by a #pattern command. When one of these options is set, the input pat- - tern is passed to pcre2_pattern_convert(). If the conversion is suc- - cessful, the result is reflected in the output and then passed to + tern is passed to pcre2_pattern_convert(). If the conversion is suc- + cessful, the result is reflected in the output and then passed to pcre2_compile(). The normal utf and no_utf_check options, if set, cause - the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be + the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to pcre2_pattern_convert(). By default, the conversion function is allowed to allocate a buffer for - its output. However, if the convert_length modifier is set to a value - greater than zero, pcre2test passes a buffer of the given length. This + its output. However, if the convert_length modifier is set to a value + greater than zero, pcre2test passes a buffer of the given length. This makes it possible to test the length check. - The convert_glob_escape and convert_glob_separator modifiers can be - used to specify the escape and separator characters for glob process- + The convert_glob_escape and convert_glob_separator modifiers can be + used to specify the escape and separator characters for glob process- ing, overriding the defaults, which are operating-system dependent. @@ -1086,10 +1126,11 @@ SUBJECT MODIFIERS Setting match options - The following modifiers set options for pcre2_match() or - pcre2_dfa_match(). See pcreapi for a description of their effects. + The following modifiers set options for pcre2_match() or + pcre2_dfa_match(). See pcre2api for a description of their effects. anchored set PCRE2_ANCHORED + copy_matched_subject set PCRE2_COPY_MATCHED_SUBJECT endanchored set PCRE2_ENDANCHORED dfa_restart set PCRE2_DFA_RESTART dfa_shortest set PCRE2_DFA_SHORTEST @@ -1103,42 +1144,42 @@ SUBJECT MODIFIERS partial_hard (or ph) set PCRE2_PARTIAL_HARD partial_soft (or ps) set PCRE2_PARTIAL_SOFT - The partial matching modifiers are provided with abbreviations because + The partial matching modifiers are provided with abbreviations because they appear frequently in tests. - If the posix or posix_nosub modifier was present on the pattern, caus- + If the posix or posix_nosub modifier was present on the pattern, caus- ing the POSIX wrapper API to be used, the only option-setting modifiers that have any effect are notbol, notempty, and noteol, causing REG_NOT- - BOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to + BOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec(). The other modifiers are ignored, with a warning message. - There is one additional modifier that can be used with the POSIX wrap- + There is one additional modifier that can be used with the POSIX wrap- per. It is ignored (with a warning) if used for non-POSIX matching. posix_startend=[:] - This causes the subject string to be passed to regexec() using the - REG_STARTEND option, which uses offsets to specify which part of the - string is searched. If only one number is given, the end offset is - passed as the end of the subject string. For more detail of REG_STAR- - TEND, see the pcre2posix documentation. If the subject string contains - binary zeros (coded as escapes such as \x{00} because pcre2test does + This causes the subject string to be passed to regexec() using the + REG_STARTEND option, which uses offsets to specify which part of the + string is searched. If only one number is given, the end offset is + passed as the end of the subject string. For more detail of REG_STAR- + TEND, see the pcre2posix documentation. If the subject string contains + binary zeros (coded as escapes such as \x{00} because pcre2test does not support actual binary zeros in its input), you must use posix_star- tend to specify its length. Setting match controls - The following modifiers affect the matching process or request addi- - tional information. Some of them may also be specified on a pattern - line (see above), in which case they apply to every subject line that - is matched against that pattern, but can be overridden by modifiers on + The following modifiers affect the matching process or request addi- + tional information. Some of them may also be specified on a pattern + line (see above), in which case they apply to every subject line that + is matched against that pattern, but can be overridden by modifiers on the subject. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allvector show the entire ovector allusedtext show all consulted text (non-JIT only) + allvector show the entire ovector altglobal alternative global matching callout_capture show captures at callout time callout_data= set a value to pass via callouts @@ -1172,7 +1213,8 @@ SUBJECT MODIFIERS startchar show startchar when relevant startoffset= same as offset= substitute_callout use substitution callouts - substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED + substitute_case_callout use substitution case callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED substitute_literal use PCRE2_SUBSTITUTE_LITERAL substitute_matched use PCRE2_SUBSTITUTE_MATCHED substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH @@ -1184,29 +1226,29 @@ SUBJECT MODIFIERS zero_terminate pass the subject as zero-terminated The effects of these modifiers are described in the following sections. - When matching via the POSIX wrapper API, the aftertext, allaftertext, - and ovector subject modifiers work as described below. All other modi- + When matching via the POSIX wrapper API, the aftertext, allaftertext, + and ovector subject modifiers work as described below. All other modi- fiers are either ignored, with a warning message, or cause an error. Showing more text - The aftertext modifier requests that as well as outputting the part of + The aftertext modifier requests that as well as outputting the part of the subject string that matched the entire pattern, pcre2test should in addition output the remainder of the subject string. This is useful for tests where the subject contains multiple copies of the same substring. - The allaftertext modifier requests the same action for captured sub- + The allaftertext modifier requests the same action for captured sub- strings as well as the main matched substring. In each case the remain- der is output on the following line with a plus character following the capture number. - The allusedtext modifier requests that all the text that was consulted - during a successful pattern match by the interpreter should be shown, - for both full and partial matches. This feature is not supported for - JIT matching, and if requested with JIT it is ignored (with a warning - message). Setting this modifier affects the output if there is a look- - behind at the start of a match, or, for a complete match, a lookahead + The allusedtext modifier requests that all the text that was consulted + during a successful pattern match by the interpreter should be shown, + for both full and partial matches. This feature is not supported for + JIT matching, and if requested with JIT it is ignored (with a warning + message). Setting this modifier affects the output if there is a look- + behind at the start of a match, or, for a complete match, a lookahead at the end, or if \K is used in the pattern. Characters that precede or - follow the start and end of the actual match are indicated in the out- + follow the start and end of the actual match are indicated in the out- put by '<' or '>' characters underneath them. Here is an example: re> /(?<=pqr)abc(?=xyz)/ @@ -1217,16 +1259,16 @@ SUBJECT MODIFIERS Partial match: pqrabcxy <<< - The first, complete match shows that the matched string is "abc", with - the preceding and following strings "pqr" and "xyz" having been con- - sulted during the match (when processing the assertions). The partial + The first, complete match shows that the matched string is "abc", with + the preceding and following strings "pqr" and "xyz" having been con- + sulted during the match (when processing the assertions). The partial match can indicate only the preceding string. - The startchar modifier requests that the starting character for the - match be indicated, if it is different to the start of the matched + The startchar modifier requests that the starting character for the + match be indicated, if it is different to the start of the matched string. The only time when this occurs is when \K has been processed as part of the match. In this situation, the output for the matched string - is displayed from the starting character instead of from the match + is displayed from the starting character instead of from the match point, with circumflex characters under the earlier characters. For ex- ample: @@ -1235,7 +1277,7 @@ SUBJECT MODIFIERS 0: abcxyz ^^^ - Unlike allusedtext, the startchar modifier can be used with JIT. How- + Unlike allusedtext, the startchar modifier can be used with JIT. How- ever, these two modifiers are mutually exclusive. Showing the value of all capture groups @@ -1243,104 +1285,104 @@ SUBJECT MODIFIERS The allcaptures modifier requests that the values of all potential cap- tured parentheses be output after a match. By default, only those up to the highest one actually used in the match are output (corresponding to - the return code from pcre2_match()). Groups that did not take part in - the match are output as "". This modifier is not relevant for - DFA matching (which does no capturing) and does not apply when replace + the return code from pcre2_match()). Groups that did not take part in + the match are output as "". This modifier is not relevant for + DFA matching (which does no capturing) and does not apply when replace is specified; it is ignored, with a warning message, if present. Showing the entire ovector, for all outcomes The allvector modifier requests that the entire ovector be shown, what- ever the outcome of the match. Compare allcaptures, which shows only up - to the maximum number of capture groups for the pattern, and then only - for a successful complete non-DFA match. This modifier, which acts af- - ter any match result, and also for DFA matching, provides a means of - checking that there are no unexpected modifications to ovector fields. - Before each match attempt, the ovector is filled with a special value, - and if this is found in both elements of a capturing pair, "" is output. After a successful match, this applies to all - groups after the maximum capture group for the pattern. In other cases - it applies to the entire ovector. After a partial match, the first two - elements are the only ones that should be set. After a DFA match, the - amount of ovector that is used depends on the number of matches that + to the maximum number of capture groups for the pattern, and then only + for a successful complete non-DFA match. This modifier, which acts af- + ter any match result, and also for DFA matching, provides a means of + checking that there are no unexpected modifications to ovector fields. + Before each match attempt, the ovector is filled with a special value, + and if this is found in both elements of a capturing pair, "" is output. After a successful match, this applies to all + groups after the maximum capture group for the pattern. In other cases + it applies to the entire ovector. After a partial match, the first two + elements are the only ones that should be set. After a DFA match, the + amount of ovector that is used depends on the number of matches that were found. Testing pattern callouts - A callout function is supplied when pcre2test calls the library match- - ing functions, unless callout_none is specified. Its behaviour can be - controlled by various modifiers listed above whose names begin with - callout_. Details are given in the section entitled "Callouts" below. - Testing callouts from pcre2_substitute() is described separately in + A callout function is supplied when pcre2test calls the library match- + ing functions, unless callout_none is specified. Its behaviour can be + controlled by various modifiers listed above whose names begin with + callout_. Details are given in the section entitled "Callouts" below. + Testing callouts from pcre2_substitute() is described separately in "Testing the substitution function" below. Finding all matches in a string Searching for all possible matches within a subject can be requested by - the global or altglobal modifier. After finding a match, the matching - function is called again to search the remainder of the subject. The - difference between global and altglobal is that the former uses the - start_offset argument to pcre2_match() or pcre2_dfa_match() to start - searching at a new point within the entire string (which is what Perl + the global or altglobal modifier. After finding a match, the matching + function is called again to search the remainder of the subject. The + difference between global and altglobal is that the former uses the + start_offset argument to pcre2_match() or pcre2_dfa_match() to start + searching at a new point within the entire string (which is what Perl does), whereas the latter passes over a shortened subject. This makes a difference to the matching process if the pattern begins with a lookbe- hind assertion (including \b or \B). - If an empty string is matched, the next match is done with the + If an empty string is matched, the next match is done with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for another, non-empty, match at the same point in the subject. If this - match fails, the start offset is advanced, and the normal match is re- - tried. This imitates the way Perl handles such cases when using the /g - modifier or the split() function. Normally, the start offset is ad- - vanced by one character, but if the newline convention recognizes CRLF - as a newline, and the current character is CR followed by LF, an ad- + match fails, the start offset is advanced, and the normal match is re- + tried. This imitates the way Perl handles such cases when using the /g + modifier or the split() function. Normally, the start offset is ad- + vanced by one character, but if the newline convention recognizes CRLF + as a newline, and the current character is CR followed by LF, an ad- vance of two characters occurs. Testing substring extraction functions - The copy and get modifiers can be used to test the pcre2_sub- + The copy and get modifiers can be used to test the pcre2_sub- string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be given more than once, and each can specify a capture group name or num- ber, for example: abcd\=copy=1,copy=3,get=G1 - If the #subject command is used to set default copy and/or get lists, - these can be unset by specifying a negative number to cancel all num- + If the #subject command is used to set default copy and/or get lists, + these can be unset by specifying a negative number to cancel all num- bered groups and an empty name to cancel all named groups. - The getall modifier tests pcre2_substring_list_get(), which extracts + The getall modifier tests pcre2_substring_list_get(), which extracts all captured substrings. - If the subject line is successfully matched, the substrings extracted - by the convenience functions are output with C, G, or L after the - string number instead of a colon. This is in addition to the normal - full list. The string length (that is, the return from the extraction + If the subject line is successfully matched, the substrings extracted + by the convenience functions are output with C, G, or L after the + string number instead of a colon. This is in addition to the normal + full list. The string length (that is, the return from the extraction function) is given in parentheses after each substring, followed by the name when the extraction was by name. Testing the substitution function - If the replace modifier is set, the pcre2_substitute() function is - called instead of one of the matching functions (or after one call of - pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that re- - placement strings cannot contain commas, because a comma signifies the - end of a modifier. This is not thought to be an issue in a test pro- + If the replace modifier is set, the pcre2_substitute() function is + called instead of one of the matching functions (or after one call of + pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that re- + placement strings cannot contain commas, because a comma signifies the + end of a modifier. This is not thought to be an issue in a test pro- gram. - Specifying a completely empty replacement string disables this modi- - fier. However, it is possible to specify an empty replacement by pro- - viding a buffer length, as described below, for an otherwise empty re- + Specifying a completely empty replacement string disables this modi- + fier. However, it is possible to specify an empty replacement by pro- + viding a buffer length, as described below, for an otherwise empty re- placement. - Unlike subject strings, pcre2test does not process replacement strings - for escape sequences. In UTF mode, a replacement string is checked to - see if it is a valid UTF-8 string. If so, it is correctly converted to - a UTF string of the appropriate code unit width. If it is not a valid - UTF-8 string, the individual code units are copied directly. This pro- + Unlike subject strings, pcre2test does not process replacement strings + for escape sequences. In UTF mode, a replacement string is checked to + see if it is a valid UTF-8 string. If so, it is correctly converted to + a UTF string of the appropriate code unit width. If it is not a valid + UTF-8 string, the individual code units are copied directly. This pro- vides a means of passing an invalid UTF-8 string for testing purposes. - The following modifiers set options (in additional to the normal match + The following modifiers set options (in additional to the normal match options) for pcre2_substitute(): global PCRE2_SUBSTITUTE_GLOBAL @@ -1354,8 +1396,8 @@ SUBJECT MODIFIERS See the pcre2api documentation for details of these options. - After a successful substitution, the modified string is output, pre- - ceded by the number of replacements. This may be zero if there were no + After a successful substitution, the modified string is output, pre- + ceded by the number of replacements. This may be zero if there were no matches. Here is a simple example of a substitution test: /abc/replace=xxx @@ -1364,12 +1406,12 @@ SUBJECT MODIFIERS =abc=abc=\=global 2: =xxx=xxx= - Subject and replacement strings should be kept relatively short (fewer - than 256 characters) for substitution tests, as fixed-size buffers are - used. To make it easy to test for buffer overflow, if the replacement - string starts with a number in square brackets, that number is passed - to pcre2_substitute() as the size of the output buffer, with the re- - placement string starting at the next character. Here is an example + Subject and replacement strings should be kept relatively short (fewer + than 256 characters) for substitution tests, as fixed-size buffers are + used. To make it easy to test for buffer overflow, if the replacement + string starts with a number in square brackets, that number is passed + to pcre2_substitute() as the size of the output buffer, with the re- + placement string starting at the next character. Here is an example that tests the edge case: /abc/ @@ -1379,12 +1421,12 @@ SUBJECT MODIFIERS Failed: error -47: no more memory The default action of pcre2_substitute() is to return PCRE2_ER- - ROR_NOMEMORY when the output buffer is too small. However, if the - PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the substi- + ROR_NOMEMORY when the output buffer is too small. However, if the + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the substi- tute_overflow_length modifier), pcre2_substitute() continues to go - through the motions of matching and substituting (but not doing any - callouts), in order to compute the size of buffer that is required. - When this happens, pcre2test shows the required buffer length (which + through the motions of matching and substituting (but not doing any + callouts), in order to compute the size of buffer that is required. + When this happens, pcre2test shows the required buffer length (which includes space for the trailing zero) as part of the error message. For example: @@ -1393,15 +1435,15 @@ SUBJECT MODIFIERS Failed: error -47: no more memory: 10 code units are needed A replacement string is ignored with POSIX and DFA matching. Specifying - partial matching provokes an error return ("bad option value") from + partial matching provokes an error return ("bad option value") from pcre2_substitute(). Testing substitute callouts If the substitute_callout modifier is set, a substitution callout func- - tion is set up. The null_context modifier must not be set, because the - address of the callout function is passed in a match context. When the - callout function is called (after each substitution), details of the + tion is set up. The null_context modifier must not be set, because the + address of the callout function is passed in a match context. When the + callout function is called (after each substitution), details of the input and output strings are output. For example: /abc/g,replace=<$0>,substitute_callout @@ -1410,19 +1452,19 @@ SUBJECT MODIFIERS 2(1) Old 6 9 "abc" New 8 13 "" 2: defpqr - The first number on each callout line is the count of matches. The + The first number on each callout line is the count of matches. The parenthesized number is the number of pairs that are set in the ovector - (that is, one more than the number of capturing groups that were set). + (that is, one more than the number of capturing groups that were set). Then are listed the offsets of the old substring, its contents, and the same for the replacement. - By default, the substitution callout function returns zero, which ac- - cepts the replacement and causes matching to continue if /g was used. - Two further modifiers can be used to test other return values. If sub- - stitute_skip is set to a value greater than zero the callout function - returns +1 for the match of that number, and similarly substitute_stop - returns -1. These cause the replacement to be rejected, and -1 causes - no further matching to take place. If either of them are set, substi- + By default, the substitution callout function returns zero, which ac- + cepts the replacement and causes matching to continue if /g was used. + Two further modifiers can be used to test other return values. If sub- + stitute_skip is set to a value greater than zero the callout function + returns +1 for the match of that number, and similarly substitute_stop + returns -1. These cause the replacement to be rejected, and -1 causes + no further matching to take place. If either of them are set, substi- tute_callout is assumed. For example: /abc/g,replace=<$0>,substitute_skip=1 @@ -1438,6 +1480,18 @@ SUBJECT MODIFIERS gle skip or stop is supported, which is sufficient for testing that the feature works. + Testing substitute case callouts + + If the substitute_case_callout modifier is set, a substitution case + callout function is set up. The callout function is called for each + substituted chunk which is to be case-transformed. + + The callout function passed is a fixed function with implementation for + certain behaviours: inputs which shrink when case-transformed; inputs + which grow; inputs with distinct upper/lower/titlecase forms. The char- + acters which are not special-cased for testing purposes are left unmod- + ified, as if they are caseless characters. + Setting the JIT stack size The jitstack modifier provides a way of setting the maximum stack size @@ -2007,8 +2061,8 @@ AUTHOR REVISION - Last updated: 24 April 2024 + Last updated: 26 December 2024 Copyright (c) 1997-2024 University of Cambridge. -PCRE 10.44 24 April 2024 PCRE2TEST(1) +PCRE2 10.45-RC1 26 December 2024 PCRE2TEST(1) diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3 index eb613f4..d245a34 100644 --- a/doc/pcre2unicode.3 +++ b/doc/pcre2unicode.3 @@ -1,6 +1,6 @@ -.TH PCRE2UNICODE 3 "04 February 2023" "PCRE2 10.43" +.TH PCRE2UNICODE 3 "27 November 2024" "PCRE2 10.45-RC1" .SH NAME -PCRE - Perl-compatible regular expressions (revised API) +PCRE2 - Perl-compatible regular expressions (revised API) .SH "UNICODE AND UTF SUPPORT" .rs .sp @@ -43,7 +43,7 @@ When PCRE2 is built with Unicode support, the escape sequences \ep{..}, The Unicode properties that can be tested are a subset of those that Perl supports. Currently they are limited to the general category properties such as Lu for an upper case letter or Nd for a decimal number, the derived properties -Any and LC (synonym L&), the Unicode script names such as Arabic or Han, +Any and Lc (synonym L&), the Unicode script names such as Arabic or Han, Bidi_Class, Bidi_Control, and a few binary properties. .P The full lists are given in the @@ -147,6 +147,35 @@ Recognition of these non-ASCII characters as case-equivalent to their ASCII counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT option. When this is set, all characters in a case equivalence must either be ASCII or non-ASCII; there can be no mixing. +.sp + Without PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' = U+212A (Kelvin sign) + 's' = 'S' = U+017F (long S) + With PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' + U+212A (Kelvin sign) only case-equivalent to itself + 's' = 'S' + U+017F (long S) only case-equivalent to itself +.P +One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +.sp + Without PCRE2_EXTRA_TURKISH_CASING: + 'i' = 'I' + U+0130 (capital I with dot above) only case-equivalent to itself + U+0131 (small dotless i) only case-equivalent to itself + With PCRE2_EXTRA_TURKISH_CASING: + 'i' = U+0130 (capital I with dot above) + U+0131 (small dotless i) = 'I' +.P +It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and +PCRE2_EXTRA_TURKISH_CASING together. +.P +From release 10.45 the Unicode letter properties Lu (upper case), Ll (lower +case), and Lt (title case) are all treated as Lc (cased letter) when caseless +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. . . .\" HTML @@ -480,6 +509,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 12 October 2023 -Copyright (c) 1997-2023 University of Cambridge. +Last updated: 27 November 2024 +Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/ltmain.sh b/ltmain.sh old mode 100644 new mode 100755 index 51e57e3..977e523 --- a/ltmain.sh +++ b/ltmain.sh @@ -2,11 +2,11 @@ ## DO NOT EDIT - This file generated from ./build-aux/ltmain.in ## by inline-source v2019-02-19.15 -# libtool (GNU libtool) 2.5.0.1-38c1-dirty +# libtool (GNU libtool) 2.4.7 # Provide generalized library-building support services. # Written by Gordon Matzigkeit , 1996 -# Copyright (C) 1996-2019, 2021-2024 Free Software Foundation, Inc. +# Copyright (C) 1996-2019, 2021-2022 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. @@ -31,8 +31,8 @@ PROGRAM=libtool PACKAGE=libtool -VERSION=2.5.0.1-38c1-dirty -package_revision=2.5.0.1 +VERSION="2.4.7 Debian-2.4.7-7build1" +package_revision=2.4.7 ## ------ ## @@ -72,11 +72,11 @@ scriptversion=2019-02-19.15; # UTC # This is free software. There is NO warranty; not even for # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Copyright (C) 2004-2019, 2021, 2023 Bootstrap Authors +# Copyright (C) 2004-2019, 2021 Bootstrap Authors # # This file is dual licensed under the terms of the MIT license -# , and GPL version 2 or later -# . You must apply one of +# , and GPL version 2 or later +# . You must apply one of # these licenses when using or redistributing this software or any of # the files within it. See the URLs above, or the file `LICENSE` # included in the Bootstrap distribution for the full license texts. @@ -143,7 +143,7 @@ nl=' ' IFS="$sp $nl" -# There are apparently some systems that use ';' as a PATH separator! +# There are apparently some retarded systems that use ';' as a PATH separator! if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { @@ -430,7 +430,7 @@ EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. # putting '$debug_cmd' at the start of all your functions, you can get # bash to show function call trace with: # -# debug_cmd='eval echo "${FUNCNAME[0]} $*" >&2' bash your-script-name +# debug_cmd='echo "${FUNCNAME[0]} $*" >&2' bash your-script-name debug_cmd=${debug_cmd-":"} exit_cmd=: @@ -572,27 +572,15 @@ func_require_term_colors () # --------------------- # Append VALUE onto the existing contents of VAR. - # We should try to minimise forks, especially on Windows where they are - # unreasonably slow, so skip the feature probes when bash or zsh are - # being used: - if test set = "${BASH_VERSION+set}${ZSH_VERSION+set}"; then - : ${_G_HAVE_ARITH_OP="yes"} - : ${_G_HAVE_XSI_OPS="yes"} - # The += operator was introduced in bash 3.1 - case $BASH_VERSION in - [12].* | 3.0 | 3.0*) ;; - *) - : ${_G_HAVE_PLUSEQ_OP="yes"} - ;; - esac - fi - # _G_HAVE_PLUSEQ_OP # Can be empty, in which case the shell is probed, "yes" if += is # useable or anything else if it does not work. - test -z "$_G_HAVE_PLUSEQ_OP" \ - && (eval 'x=a; x+=" b"; test "a b" = "$x"') 2>/dev/null \ - && _G_HAVE_PLUSEQ_OP=yes + if test -z "$_G_HAVE_PLUSEQ_OP" && \ + __PLUSEQ_TEST="a" && \ + __PLUSEQ_TEST+=" b" 2>/dev/null && \ + test "a b" = "$__PLUSEQ_TEST"; then + _G_HAVE_PLUSEQ_OP=yes + fi if test yes = "$_G_HAVE_PLUSEQ_OP" then @@ -1536,11 +1524,11 @@ func_lt_ver () # This is free software. There is NO warranty; not even for # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# Copyright (C) 2010-2019, 2021, 2023 Bootstrap Authors +# Copyright (C) 2010-2019, 2021 Bootstrap Authors # # This file is dual licensed under the terms of the MIT license -# , and GPL version 2 or later -# . You must apply one of +# , and GPL version 2 or later +# . You must apply one of # these licenses when using or redistributing this software or any of # the files within it. See the URLs above, or the file `LICENSE` # included in the Bootstrap distribution for the full license texts. @@ -1706,6 +1694,8 @@ func_run_hooks () { $debug_cmd + _G_rc_run_hooks=false + case " $hookable_fns " in *" $1 "*) ;; *) func_fatal_error "'$1' does not support hook functions." ;; @@ -2215,7 +2205,7 @@ func_version () # End: # Set a version string. -scriptversion='(GNU libtool) 2.5.0.1-38c1-dirty' +scriptversion='(GNU libtool) 2.4.7' # func_echo ARG... @@ -2306,13 +2296,13 @@ include the following information: compiler: $LTCC compiler flags: $LTCFLAGS linker: $LD (gnu? $with_gnu_ld) - version: $progname (GNU libtool) 2.5.0.1-38c1-dirty + version: $progname $scriptversion Debian-2.4.7-7build1 automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` Report bugs to . -GNU libtool home page: . -General help using GNU software: ." +GNU libtool home page: . +General help using GNU software: ." exit 0 } @@ -2510,6 +2500,8 @@ libtool_options_prep () _G_rc_lt_options_prep=: + _G_rc_lt_options_prep=: + # Shorthand for --mode=foo, only valid as the first argument case $1 in clean|clea|cle|cl) @@ -2668,10 +2660,10 @@ libtool_validate_options () # preserve --debug test : = "$debug_cmd" || func_append preserve_args " --debug" - case $host_os in + case $host in # Solaris2 added to fix http://debbugs.gnu.org/cgi/bugreport.cgi?bug=16452 # see also: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59788 - cygwin* | mingw* | windows* | pw32* | cegcc* | solaris2* | os2*) + *cygwin* | *mingw* | *pw32* | *cegcc* | *solaris2* | *os2*) # don't eliminate duplications in $postdeps and $predeps opt_duplicate_compiler_generated_deps=: ;; @@ -3003,7 +2995,7 @@ EOF # func_convert_core_file_wine_to_w32 ARG # Helper function used by file name conversion functions when $build is *nix, -# and $host is mingw, windows, cygwin, or some other w32 environment. Relies on a +# and $host is mingw, cygwin, or some other w32 environment. Relies on a # correctly configured wine environment available, with the winepath program # in $build's $PATH. # @@ -3035,10 +3027,9 @@ func_convert_core_file_wine_to_w32 () # func_convert_core_path_wine_to_w32 ARG # Helper function used by path conversion functions when $build is *nix, and -# $host is mingw, windows, cygwin, or some other w32 environment. Relies on a -# correctly configured wine environment available, with the winepath program -# in $build's $PATH. Assumes ARG has no leading or trailing path separator -# characters. +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. # # ARG is path to be converted from $build format to win32. # Result is available in $func_convert_core_path_wine_to_w32_result. @@ -3693,7 +3684,7 @@ func_mode_compile () # On Cygwin there's no "real" PIC flag so we must build both object types case $host_os in - cygwin* | mingw* | windows* | pw32* | os2* | cegcc*) + cygwin* | mingw* | pw32* | os2* | cegcc*) pic_mode=default ;; esac @@ -4570,7 +4561,7 @@ func_mode_install () 'exit $?' tstripme=$stripme case $host_os in - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) case $realname in *.dll.a) tstripme= @@ -4683,7 +4674,7 @@ func_mode_install () # Do a test to see if this is really a libtool program. case $host in - *cygwin* | *mingw* | *windows*) + *cygwin* | *mingw*) if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" wrapper=$func_ltwrapper_scriptname_result @@ -4911,7 +4902,7 @@ extern \"C\" { $RM $export_symbols eval "$SED -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' case $host in - *cygwin* | *mingw* | *windows* | *cegcc* ) + *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' ;; @@ -4923,7 +4914,7 @@ extern \"C\" { eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' case $host in - *cygwin* | *mingw* | *windows* | *cegcc* ) + *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' ;; @@ -4937,7 +4928,7 @@ extern \"C\" { func_basename "$dlprefile" name=$func_basename_result case $host in - *cygwin* | *mingw* | *windows* | *cegcc* ) + *cygwin* | *mingw* | *cegcc* ) # if an import library, we need to obtain dlname if func_win32_import_lib_p "$dlprefile"; then func_tr_sh "$dlprefile" @@ -5112,7 +5103,7 @@ static const void *lt_preloaded_setup() { # Transform the symbol file into the correct name. symfileobj=$output_objdir/${my_outputname}S.$objext case $host in - *cygwin* | *mingw* | *windows* | *cegcc* ) + *cygwin* | *mingw* | *cegcc* ) if test -f "$output_objdir/$my_outputname.def"; then compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` @@ -5455,7 +5446,7 @@ func_extract_archives () # # Emit a libtool wrapper script on stdout. # Don't directly open a file because we may want to -# incorporate the script contents within a cygwin/mingw/windows +# incorporate the script contents within a cygwin/mingw # wrapper executable. Must ONLY be called from within # func_mode_link because it depends on a number of variables # set therein. @@ -5463,7 +5454,7 @@ func_extract_archives () # ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR # variable will take. If 'yes', then the emitted script # will assume that the directory where it is stored is -# the $objdir directory. This is a cygwin/mingw/windows-specific +# the $objdir directory. This is a cygwin/mingw-specific # behavior. func_emit_wrapper () { @@ -5588,7 +5579,7 @@ func_exec_program_core () " case $host in # Backslashes separate directories on plain windows - *-*-mingw* | *-*-windows* | *-*-os2* | *-cegcc*) + *-*-mingw | *-*-os2* | *-cegcc*) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir\\\\\$program\" 1>&2 @@ -5656,7 +5647,7 @@ func_exec_program () file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` done - # Usually 'no', except on cygwin/mingw/windows when embedded into + # Usually 'no', except on cygwin/mingw when embedded into # the cwrapper. WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then @@ -5788,7 +5779,7 @@ EOF #endif #include #include -#if defined _WIN32 && !defined __GNUC__ +#ifdef _MSC_VER # include # include # include @@ -5813,7 +5804,7 @@ EOF /* declarations of non-ANSI functions */ #if defined __MINGW32__ # ifdef __STRICT_ANSI__ -_CRTIMP int __cdecl _putenv (const char *); +int _putenv (const char *); # endif #elif defined __CYGWIN__ # ifdef __STRICT_ANSI__ @@ -6011,7 +6002,7 @@ main (int argc, char *argv[]) { EOF case $host in - *mingw* | *windows* | *cygwin* ) + *mingw* | *cygwin* ) # make stdout use "unix" line endings echo " setmode(1,_O_BINARY);" ;; @@ -6030,7 +6021,7 @@ EOF { /* however, if there is an option in the LTWRAPPER_OPTION_PREFIX namespace, but it is not one of the ones we know about and - have already dealt with, above (including dump-script), then + have already dealt with, above (inluding dump-script), then report an error. Otherwise, targets might begin to believe they are allowed to use options in the LTWRAPPER_OPTION_PREFIX namespace. The first time any user complains about this, we'll @@ -6114,7 +6105,7 @@ EOF EOF case $host_os in - mingw* | windows*) + mingw*) cat <<"EOF" { char* p; @@ -6156,7 +6147,7 @@ EOF EOF case $host_os in - mingw* | windows*) + mingw*) cat <<"EOF" /* execv doesn't actually work on mingw as expected on unix */ newargz = prepare_spawn (newargz); @@ -6575,7 +6566,7 @@ lt_update_lib_path (const char *name, const char *value) EOF case $host_os in - mingw* | windows*) + mingw*) cat <<"EOF" /* Prepares an argument vector before calling spawn(). @@ -6750,7 +6741,7 @@ func_mode_link () $debug_cmd case $host in - *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-cegcc*) + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) # It is impossible to link a dll without this setting, and # we shouldn't force the makefile maintainer to figure out # what system we are compiling for in order to pass an extra @@ -7256,7 +7247,7 @@ func_mode_link () ;; esac case $host in - *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-cegcc*) + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` case :$dllsearchpath: in *":$dir:"*) ;; @@ -7276,7 +7267,7 @@ func_mode_link () -l*) if test X-lc = "X$arg" || test X-lm = "X$arg"; then case $host in - *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) # These systems don't actually have a C or math library (as such) continue ;; @@ -7284,7 +7275,7 @@ func_mode_link () # These systems don't actually have a C library (as such) test X-lc = "X$arg" && continue ;; - *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) # Do not include libc due to us having libc/libc_r. test X-lc = "X$arg" && continue ;; @@ -7304,7 +7295,7 @@ func_mode_link () esac elif test X-lc_r = "X$arg"; then case $host in - *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) # Do not include libc_r directly, use -pthread flag. continue ;; @@ -7348,7 +7339,7 @@ func_mode_link () continue ;; -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \ - |-threads|-fopenmp|-fopenmp=*|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" @@ -7371,7 +7362,7 @@ func_mode_link () -no-install) case $host in - *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) # The PATH hackery in wrapper scripts is required on Windows # and Darwin in order for the loader to find any dlls it needs. func_warning "'-no-install' is ignored for $host" @@ -7556,27 +7547,15 @@ func_mode_link () # -O*, -g*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization # -specs=* GCC specs files # -stdlib=* select c++ std lib with clang - # -fdiagnostics-color* simply affects output - # -frecord-gcc-switches used to verify flags were respected # -fsanitize=* Clang/GCC memory and address sanitizer - # -fno-sanitize* Clang/GCC memory and address sanitizer - # -shared-libsan Link with shared sanitizer runtimes (Clang) - # -static-libsan Link with static sanitizer runtimes (Clang) - # -no-canonical-prefixes Do not expand any symbolic links # -fuse-ld=* Linker select flags for GCC - # -rtlib=* select c runtime lib with clang - # --unwindlib=* select unwinder library with clang - # -f{file|debug|macro|profile}-prefix-map=* needed for lto linking + # -static-* direct GCC to link specific libraries statically + # -fcilkplus Cilk Plus language extension features for C/C++ # -Wa,* Pass flags directly to the assembler - # -Werror, -Werror=* Report (specified) warnings as errors -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ - -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-no-canonical-prefixes| \ - -stdlib=*|-rtlib=*|--unwindlib=*| \ - -specs=*|-fsanitize=*|-fno-sanitize*|-shared-libsan|-static-libsan| \ - -ffile-prefix-map=*|-fdebug-prefix-map=*|-fmacro-prefix-map=*|-fprofile-prefix-map=*| \ - -fdiagnostics-color*|-frecord-gcc-switches| \ - -fuse-ld=*|-Wa,*|-Werror|-Werror=*) + -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \ + -specs=*|-fsanitize=*|-fuse-ld=*|-static-*|-fcilkplus|-Wa,*) func_quote_arg pretty "$arg" arg=$func_quote_arg_result func_append compile_command " $arg" @@ -7869,7 +7848,10 @@ func_mode_link () case $pass in dlopen) libs=$dlfiles ;; dlpreopen) libs=$dlprefiles ;; - link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + link) + libs="$deplibs %DEPLIBS%" + test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" + ;; esac fi if test lib,dlpreopen = "$linkmode,$pass"; then @@ -7906,7 +7888,7 @@ func_mode_link () found=false case $deplib in -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ - |-threads|-fopenmp|-fopenmp=*|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) if test prog,link = "$linkmode,$pass"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" @@ -8083,15 +8065,18 @@ func_mode_link () ;; esac if $valid_a_lib; then - func_warning "Linking the shared library $output against the static library $deplib is not portable!" + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" deplibs="$deplib $deplibs" else - func_warning "Trying to link with static lib archive $deplib." - func_warning "I have the capability to make that library automatically link in when" - func_warning "you link to this library. But I can only do this if you have a" - func_warning "shared version of the library, which you do not appear to have" - func_warning "because the file extensions .$libext of this argument makes me believe" - func_warning "that it is just a static archive that I should not use here." + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." fi ;; esac @@ -8185,19 +8170,19 @@ func_mode_link () # It is a libtool convenience library, so add in its objects. func_append convenience " $ladir/$objdir/$old_library" func_append old_convenience " $ladir/$objdir/$old_library" + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done elif test prog != "$linkmode" && test lib != "$linkmode"; then func_fatal_error "'$lib' is not a convenience library" fi - tmp_libs= - for deplib in $dependency_libs; do - deplibs="$deplib $deplibs" - if $opt_preserve_dup_deps; then - case "$tmp_libs " in - *" $deplib "*) func_append specialdeplibs " $deplib" ;; - esac - fi - func_append tmp_libs " $deplib" - done continue fi # $pass = conv @@ -8286,7 +8271,7 @@ func_mode_link () fi case $host in # special handling for platforms with PE-DLLs. - *cygwin* | *mingw* | *windows* | *cegcc* ) + *cygwin* | *mingw* | *cegcc* ) # Linker will automatically link against shared library if both # static and shared are present. Therefore, ensure we extract # symbols from the import library if a shared library is present @@ -8429,8 +8414,8 @@ func_mode_link () fi if test -n "$library_names" && { test no = "$use_static_libs" || test -z "$old_library"; }; then - case $host_os in - cygwin* | mingw* | windows* | cegcc* | os2*) + case $host in + *cygwin* | *mingw* | *cegcc* | *os2*) # No point in relinking DLLs because paths are not encoded func_append notinst_deplibs " $lib" need_relink=no @@ -8456,11 +8441,11 @@ func_mode_link () if test -z "$dlopenmodule" && test yes = "$shouldnotlink" && test link = "$pass"; then echo if test prog = "$linkmode"; then - func_warning "Linking the executable $output against the loadable module" + $ECHO "*** Warning: Linking the executable $output against the loadable module" else - func_warning "Linking the shared library $output against the loadable module" + $ECHO "*** Warning: Linking the shared library $output against the loadable module" fi - func_warning "$linklib is not portable!" + $ECHO "*** $linklib is not portable!" fi if test lib = "$linkmode" && test yes = "$hardcode_into_libs"; then @@ -8499,8 +8484,8 @@ func_mode_link () soname=$dlname elif test -n "$soname_spec"; then # bleh windows - case $host_os in - cygwin* | mingw* | windows* | cegcc* | os2*) + case $host in + *cygwin* | mingw* | *cegcc* | *os2*) func_arith $current - $age major=$func_arith_result versuffix=-$major @@ -8555,10 +8540,11 @@ func_mode_link () if /usr/bin/file -L $add 2> /dev/null | $GREP ": [^:]* bundle" >/dev/null; then if test "X$dlopenmodule" != "X$lib"; then - func_warning "lib $linklib is a module, not a shared library" + $ECHO "*** Warning: lib $linklib is a module, not a shared library" if test -z "$old_library"; then - func_warning "And there doesn't seem to be a static archive available" - func_warning "The link will probably fail, sorry" + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" else add=$dir/$old_library fi @@ -8698,19 +8684,21 @@ func_mode_link () # Just print a warning and add the library to dependency_libs so # that the program can be linked against the static library. - func_warning "This system cannot link to static lib archive $lib." - func_warning "I have the capability to make that library automatically link in when" - func_warning "you link to this library. But I can only do this if you have a" - func_warning "shared version of the library, which you do not appear to have." + echo + $ECHO "*** Warning: This system cannot link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." if test yes = "$module"; then - func_warning "But as you try to build a module library, libtool will still create " - func_warning "a static module, that should work as long as the dlopening application" - func_warning "is linked with the -dlopen flag to resolve symbols at runtime." + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." if test -z "$global_symbol_pipe"; then - func_warning "However, this would only work if libtool was able to extract symbol" - func_warning "lists from a program, using 'nm' or equivalent, but libtool could" - func_warning "not find such a program. So, this module is probably useless." - func_warning "'nm' from GNU binutils and a full rebuild may help." + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." fi if test no = "$build_old_libs"; then build_libtool_libs=module @@ -9037,7 +9025,9 @@ func_mode_link () if test pass_all != "$deplibs_check_method"; then func_fatal_error "cannot build libtool library '$output' from non-libtool objects on this host:$objs" else - func_warning "Linking the shared library $output against the non-libtool objects $objs is not portable!" + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" func_append libobjs " $objs" fi fi @@ -9098,13 +9088,13 @@ func_mode_link () # case $version_type in # correct linux to gnu/linux during the next big refactor - darwin|freebsd-elf|linux|midnightbsd-elf|osf|qnx|windows|none) + darwin|freebsd-elf|linux|midnightbsd-elf|osf|windows|none) func_arith $number_major + $number_minor current=$func_arith_result age=$number_minor revision=$number_revision ;; - freebsd-aout|sco|sunos) + freebsd-aout|qnx|sunos) current=$number_major revision=$number_minor age=0 @@ -9116,6 +9106,9 @@ func_mode_link () revision=$number_minor lt_irix_increment=no ;; + *) + func_fatal_configuration "$modename: unknown library version type '$version_type'" + ;; esac ;; no) @@ -9251,9 +9244,8 @@ func_mode_link () ;; qnx) - func_arith $current - $age - major=.$func_arith_result - versuffix=$major.$age.$revision + major=.$current + versuffix=.$current ;; sco) @@ -9406,7 +9398,7 @@ func_mode_link () if test yes = "$build_libtool_libs"; then if test -n "$rpath"; then case $host in - *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) # these systems don't actually have a c library (as such)! ;; *-*-rhapsody* | *-*-darwin1.[012]) @@ -9457,6 +9449,108 @@ func_mode_link () # implementing what was already the behavior. newdeplibs=$deplibs ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c <. ]) -# serial 61 LT_INIT +# serial 59 LT_INIT # LT_PREREQ(VERSION) @@ -616,7 +616,7 @@ m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT # LT_OUTPUT # --------- # This macro allows early generation of the libtool script (before -# AC_OUTPUT is called), in case it is used in configure for compilation +# AC_OUTPUT is called), incase it is used in configure for compilation # tests. AC_DEFUN([LT_OUTPUT], [: ${CONFIG_LT=./config.lt} @@ -651,9 +651,9 @@ m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) configured by $[0], generated by m4_PACKAGE_STRING. -Copyright (C) 2024 Free Software Foundation, Inc. +Copyright (C) 2011 Free Software Foundation, Inc. This config.lt script is free software; the Free Software Foundation -gives unlimited permission to copy, distribute and modify it." +gives unlimited permision to copy, distribute and modify it." while test 0 != $[#] do @@ -1255,9 +1255,7 @@ lt_sysroot= case $with_sysroot in #( yes) if test yes = "$GCC"; then - # Trim trailing / since we'll always append absolute paths and we want - # to avoid //, if only for less confusing output for the user. - lt_sysroot=`$CC --print-sysroot 2>/dev/null | $SED 's:/\+$::'` + lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) @@ -1369,7 +1367,7 @@ mips64*-*linux*) ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ -s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. Note that the listed cases only cover the # situations where additional linker options are needed (such as when @@ -1384,7 +1382,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; - x86_64-*linux*|x86_64-gnu*) + x86_64-*linux*) case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" @@ -1413,7 +1411,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; - x86_64-*linux*|x86_64-gnu*) + x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; powerpcle-*linux*) @@ -1496,7 +1494,7 @@ _LT_DECL([], [AR], [1], [The archiver]) # Use ARFLAGS variable as AR's operation code to sync the variable naming with # Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have -# higher priority because that's what people were doing historically (setting +# higher priority because thats what people were doing historically (setting # ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS # variable obsoleted/removed. @@ -1557,8 +1555,15 @@ old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" - old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in @@ -1697,7 +1702,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl lt_cv_sys_max_cmd_len=-1; ;; - cygwin* | mingw* | windows* | cegcc*) + cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, @@ -1719,7 +1724,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl lt_cv_sys_max_cmd_len=8192; ;; - darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` @@ -1940,7 +1945,7 @@ else lt_cv_dlopen_self=yes ;; - mingw* | windows* | pw32* | cegcc*) + mingw* | pw32* | cegcc*) lt_cv_dlopen=LoadLibrary lt_cv_dlopen_libs= ;; @@ -2308,7 +2313,7 @@ if test yes = "$GCC"; then *) lt_awk_arg='/^libraries:/' ;; esac case $host_os in - mingw* | windows* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; + mingw* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; *) lt_sed_strip_eq='s|=/|/|g' ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` @@ -2366,7 +2371,7 @@ BEGIN {RS = " "; FS = "/|\n";} { # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in - mingw* | windows* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` @@ -2535,7 +2540,7 @@ bsdi[[45]]*) # libtool to hard-code these into programs ;; -cygwin* | mingw* | windows* | pw32* | cegcc*) +cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no @@ -2567,7 +2572,7 @@ cygwin* | mingw* | windows* | pw32* | cegcc*) m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) ;; - mingw* | windows* | cegcc*) + mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' ;; @@ -2586,7 +2591,7 @@ m4_if([$1], [],[ library_names_spec='$libname.dll.lib' case $build_os in - mingw* | windows*) + mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' @@ -2834,7 +2839,7 @@ linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no - library_names_spec='$libname$release$shared_ext $libname$shared_ext' + library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH @@ -2846,9 +2851,8 @@ linux*android*) hardcode_into_libs=yes dynamic_linker='Android linker' - # -rpath works at least for libraries that are not overridden by - # libraries installed in system locations. - _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + # Don't embed -rpath directories since the linker doesn't support them. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' ;; # This must be glibc/ELF. @@ -2882,7 +2886,7 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) # before this can be enabled. hardcode_into_libs=yes - # Ideally, we could use ldconfig to report *all* directories which are + # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, @@ -2902,6 +2906,18 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) dynamic_linker='GNU/Linux ld.so' ;; +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + netbsd*) version_type=sunos need_lib_prefix=no @@ -2939,7 +2955,7 @@ newsos6) dynamic_linker='ldqnx.so' ;; -openbsd*) +openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no @@ -3271,7 +3287,7 @@ if test yes = "$GCC"; then # Check if gcc -print-prog-name=ld gives a path. AC_MSG_CHECKING([for ld used by $CC]) case $host in - *-*-mingw* | *-*-windows*) + *-*-mingw*) # gcc leaves a trailing carriage return, which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) @@ -3380,7 +3396,7 @@ case $reload_flag in esac reload_cmds='$LD$reload_flag -o $output$reload_objs' case $host_os in - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) if test yes != "$GCC"; then reload_cmds=false fi @@ -3452,6 +3468,7 @@ lt_cv_deplibs_check_method='unknown' # 'none' -- dependencies not supported. # 'unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # that responds to the $file_magic_cmd with a given extended regex. # If you have 'file' or equivalent on your system and you're not sure @@ -3478,7 +3495,7 @@ cygwin*) lt_cv_file_magic_cmd='func_win32_libid' ;; -mingw* | windows* | pw32*) +mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. @@ -3560,7 +3577,7 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) lt_cv_deplibs_check_method=pass_all ;; -netbsd*) +netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' else @@ -3578,7 +3595,7 @@ newos6*) lt_cv_deplibs_check_method=pass_all ;; -openbsd*) +openbsd* | bitrig*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' else @@ -3642,7 +3659,7 @@ file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in - mingw* | windows* | pw32*) + mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else @@ -3694,7 +3711,7 @@ else # Tru64's nm complains that /dev/null is an invalid object file # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty case $build_os in - mingw* | windows*) lt_bad_file=conftest.nm/nofile ;; + mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in @@ -3785,7 +3802,7 @@ lt_cv_sharedlib_from_linklib_cmd, [lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in -cygwin* | mingw* | windows* | pw32* | cegcc*) +cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh; # decide which one to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in @@ -3817,16 +3834,16 @@ _LT_DECL([], [sharedlib_from_linklib_cmd], [1], m4_defun([_LT_PATH_MANIFEST_TOOL], [AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt -AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_manifest_tool], - [lt_cv_path_manifest_tool=no +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&AS_MESSAGE_LOG_FD if $GREP 'Manifest Tool' conftest.out > /dev/null; then - lt_cv_path_manifest_tool=yes + lt_cv_path_mainfest_tool=yes fi rm -f conftest*]) -if test yes != "$lt_cv_path_manifest_tool"; then +if test yes != "$lt_cv_path_mainfest_tool"; then MANIFEST_TOOL=: fi _LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl @@ -3855,7 +3872,7 @@ AC_DEFUN([LT_LIB_M], [AC_REQUIRE([AC_CANONICAL_HOST])dnl LIBM= case $host in -*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-mingw* | *-*-pw32* | *-*-darwin*) +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) # These system don't have libm, or don't need it ;; *-ncr-sysv4.3*) @@ -3930,7 +3947,7 @@ case $host_os in aix*) symcode='[[BCDT]]' ;; -cygwin* | mingw* | windows* | pw32* | cegcc*) +cygwin* | mingw* | pw32* | cegcc*) symcode='[[ABCDGISTW]]' ;; hpux*) @@ -3945,7 +3962,7 @@ osf*) symcode='[[BCDEGQRST]]' ;; solaris*) - symcode='[[BCDRT]]' + symcode='[[BDRT]]' ;; sco3.2v5*) symcode='[[DT]]' @@ -4009,7 +4026,7 @@ $lt_c_name_lib_hook\ # Handle CRLF in mingw tool chain opt_cr= case $build_os in -mingw* | windows*) +mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac @@ -4066,7 +4083,8 @@ _LT_EOF if AC_TRY_EVAL(ac_compile); then # Now try to grab the symbols. nlist=conftest.nm - if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&AS_MESSAGE_LOG_FD + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&AS_MESSAGE_LOG_FD && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" @@ -4236,7 +4254,7 @@ m4_if([$1], [CXX], [ beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; - mingw* | windows* | cygwin* | os2* | pw32* | cegcc*) + mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style @@ -4312,7 +4330,7 @@ m4_if([$1], [CXX], [ ;; esac ;; - mingw* | windows* | cygwin* | os2* | pw32* | cegcc*) + mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], @@ -4438,7 +4456,7 @@ m4_if([$1], [CXX], [ ;; esac ;; - netbsd*) + netbsd* | netbsdelf*-gnu) ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise @@ -4560,7 +4578,7 @@ m4_if([$1], [CXX], [ # PIC is the default for these OSes. ;; - mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style @@ -4664,7 +4682,7 @@ m4_if([$1], [CXX], [ esac ;; - mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], @@ -4706,8 +4724,8 @@ m4_if([$1], [CXX], [ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; - *flang) - # Flang compiler. + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' @@ -4945,7 +4963,7 @@ m4_if([$1], [CXX], [ pw32*) _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds ;; - cygwin* | mingw* | windows* | cegcc*) + cygwin* | mingw* | cegcc*) case $cc_basename in cl* | icl*) _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' @@ -4956,6 +4974,9 @@ m4_if([$1], [CXX], [ ;; esac ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; @@ -5003,7 +5024,7 @@ dnl Note also adjust exclude_expsyms for C++ above. extract_expsyms_cmds= case $host_os in - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. @@ -5015,9 +5036,12 @@ dnl Note also adjust exclude_expsyms for C++ above. # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; - openbsd*) + openbsd* | bitrig*) with_gnu_ld=no ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; esac _LT_TAGVAR(ld_shlibs, $1)=yes @@ -5118,7 +5142,7 @@ _LT_EOF fi ;; - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' @@ -5174,7 +5198,7 @@ _LT_EOF cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' ;; @@ -5253,6 +5277,7 @@ _LT_EOF case $cc_basename in tcc*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) @@ -5273,7 +5298,7 @@ _LT_EOF fi ;; - netbsd*) + netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= @@ -5575,7 +5600,7 @@ _LT_EOF _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic ;; - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is @@ -5592,14 +5617,14 @@ _LT_EOF # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. - _LT_TAGVAR(archive_cmds, $1)='$CC -Fe $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ - $CC -Fe $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' @@ -5794,6 +5819,7 @@ _LT_EOF if test yes = "$lt_cv_irix_exported_symbol"; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' fi + _LT_TAGVAR(link_all_deplibs, $1)=no else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' @@ -5811,11 +5837,12 @@ _LT_EOF # Fabrice Bellard et al's Tiny C Compiler _LT_TAGVAR(ld_shlibs, $1)=yes _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' ;; esac ;; - netbsd*) + netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else @@ -5837,7 +5864,7 @@ _LT_EOF *nto* | *qnx*) ;; - openbsd*) + openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no @@ -5880,7 +5907,7 @@ _LT_EOF cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' ;; @@ -6174,7 +6201,7 @@ _LT_TAGDECL([], [hardcode_direct], [0], _LT_TAGDECL([], [hardcode_direct_absolute], [0], [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes DIR into the resulting binary and the resulting library dependency is - "absolute", i.e. impossible to change by setting $shlibpath_var if the + "absolute", i.e impossible to change by setting $shlibpath_var if the library is relocated]) _LT_TAGDECL([], [hardcode_minus_L], [0], [Set to "yes" if using the -LDIR flag during linking hardcodes DIR @@ -6421,7 +6448,8 @@ if test yes != "$_lt_caught_CXX_error"; then wlarc='$wl' # ancient GNU ld didn't support --whole-archive et. al. - if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= @@ -6441,7 +6469,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else GXX=no @@ -6650,7 +6678,7 @@ if test yes != "$_lt_caught_CXX_error"; then esac ;; - cygwin* | mingw* | windows* | pw32* | cegcc*) + cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in ,cl* | no,cl* | ,icl* | no,icl*) # Native MSVC or ICC @@ -6749,7 +6777,7 @@ if test yes != "$_lt_caught_CXX_error"; then cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' - _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' ;; @@ -6817,7 +6845,7 @@ if test yes != "$_lt_caught_CXX_error"; then # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "[[-]]L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then @@ -6882,7 +6910,7 @@ if test yes != "$_lt_caught_CXX_error"; then # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. - output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "[[-]]L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then @@ -7130,7 +7158,7 @@ if test yes != "$_lt_caught_CXX_error"; then _LT_TAGVAR(ld_shlibs, $1)=yes ;; - openbsd*) + openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no @@ -7221,7 +7249,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # FIXME: insert proper C++ library support @@ -7305,7 +7333,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # g++ 2.7 appears to require '-G' NOT '-shared' on this # platform. @@ -7316,7 +7344,7 @@ if test yes != "$_lt_caught_CXX_error"; then # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. - output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' @@ -7554,11 +7582,10 @@ if AC_TRY_EVAL(ac_compile); then case $prev$p in -L* | -R* | -l*) - # Some compilers place space between "-{L,R,l}" and the path. + # Some compilers place space between "-{L,R}" and the path. # Remove the space. - if test x-L = x"$p" || - test x-R = x"$p" || - test x-l = x"$p"; then + if test x-L = "$p" || + test x-R = "$p"; then prev=$p continue fi @@ -8216,7 +8243,7 @@ AC_SUBST([DLLTOOL]) # ---------------- # Check for a file(cmd) program that can be used to detect file type and magic m4_defun([_LT_DECL_FILECMD], -[AC_CHECK_PROG([FILECMD], [file], [:]) +[AC_CHECK_TOOL([FILECMD], [file], [:]) _LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types]) ])# _LD_DECL_FILECMD @@ -8232,6 +8259,73 @@ _LT_DECL([], [SED], [1], [A sed program that does not truncate output]) _LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], [Sed that helps us avoid accidentally triggering echo(1) options like -n]) ])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f "$lt_ac_sed" && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test 10 -lt "$lt_ac_count" && break + lt_ac_count=`expr $lt_ac_count + 1` + if test "$lt_ac_count" -gt "$lt_ac_max"; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_SED], []) @@ -8278,7 +8372,7 @@ AC_CACHE_VAL(lt_cv_to_host_file_cmd, [case $host in *-*-mingw* ) case $build in - *-*-mingw* | *-*-windows* ) # actually msys + *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) @@ -8291,7 +8385,7 @@ AC_CACHE_VAL(lt_cv_to_host_file_cmd, ;; *-*-cygwin* ) case $build in - *-*-mingw* | *-*-windows* ) # actually msys + *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) @@ -8317,9 +8411,9 @@ AC_CACHE_VAL(lt_cv_to_tool_file_cmd, [#assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in - *-*-mingw* | *-*-windows* ) + *-*-mingw* ) case $build in - *-*-mingw* | *-*-windows* ) # actually msys + *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4 index 6dfe99f..b0b5e9c 100644 --- a/m4/ltoptions.m4 +++ b/m4/ltoptions.m4 @@ -1,6 +1,6 @@ # Helper functions for option handling. -*- Autoconf -*- # -# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2024 Free +# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2022 Free # Software Foundation, Inc. # Written by Gary V. Vaughan, 2004 # @@ -8,7 +8,7 @@ # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. -# serial 9 ltoptions.m4 +# serial 8 ltoptions.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) @@ -128,7 +128,7 @@ LT_OPTION_DEFINE([LT_INIT], [win32-dll], [enable_win32_dll=yes case $host in -*-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-cegcc*) +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) AC_CHECK_TOOL(AS, as, false) AC_CHECK_TOOL(DLLTOOL, dlltool, false) AC_CHECK_TOOL(OBJDUMP, objdump, false) diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4 index 5b5c80a..902508b 100644 --- a/m4/ltsugar.m4 +++ b/m4/ltsugar.m4 @@ -1,6 +1,6 @@ # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # -# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2024 Free Software +# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2022 Free Software # Foundation, Inc. # Written by Gary V. Vaughan, 2004 # diff --git a/m4/ltversion.m4 b/m4/ltversion.m4 index 82887f7..b155d0a 100644 --- a/m4/ltversion.m4 +++ b/m4/ltversion.m4 @@ -1,6 +1,6 @@ # ltversion.m4 -- version numbers -*- Autoconf -*- # -# Copyright (C) 2004, 2011-2019, 2021-2024 Free Software Foundation, +# Copyright (C) 2004, 2011-2019, 2021-2022 Free Software Foundation, # Inc. # Written by Scott James Remnant, 2004 # @@ -10,15 +10,15 @@ # @configure_input@ -# serial 4337 ltversion.m4 +# serial 4245 ltversion.m4 # This file is part of GNU Libtool -m4_define([LT_PACKAGE_VERSION], [2.5.0.1-38c1-dirty]) -m4_define([LT_PACKAGE_REVISION], [2.5.0.1]) +m4_define([LT_PACKAGE_VERSION], [2.4.7]) +m4_define([LT_PACKAGE_REVISION], [2.4.7]) AC_DEFUN([LTVERSION_VERSION], -[macro_version='2.5.0.1-38c1-dirty' -macro_revision='2.5.0.1' +[macro_version='2.4.7' +macro_revision='2.4.7' _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) _LT_DECL(, macro_revision, 0) ]) diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4 index 22b5346..0f7a875 100644 --- a/m4/lt~obsolete.m4 +++ b/m4/lt~obsolete.m4 @@ -1,6 +1,6 @@ # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # -# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2024 Free +# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2022 Free # Software Foundation, Inc. # Written by Scott James Remnant, 2004. # diff --git a/m4/pcre2_visibility.m4 b/m4/pcre2_visibility.m4 index c025d5f..03f4fba 100644 --- a/m4/pcre2_visibility.m4 +++ b/m4/pcre2_visibility.m4 @@ -4,63 +4,63 @@ dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. -dnl From Bruno Haible. +dnl Originally From Bruno Haible. dnl Tests whether the compiler supports the command-line option -dnl -fvisibility=hidden and the function and variable attributes -dnl __attribute__((__visibility__("hidden"))) and +dnl -fvisibility=hidden and the function attribute dnl __attribute__((__visibility__("default"))). -dnl Does *not* test for __visibility__("protected") - which has tricky -dnl semantics (see the 'vismain' test in glibc) and does not exist e.g. on -dnl MacOS X. -dnl Does *not* test for __visibility__("internal") - which has processor -dnl dependent semantics. -dnl Does *not* test for #pragma GCC visibility push(hidden) - which is -dnl "really only recommended for legacy code". -dnl Set the variable CFLAG_VISIBILITY. +dnl +dnl Set the variable VISIBILITY_CFLAGS. dnl Defines and sets the variable HAVE_VISIBILITY. +dnl Defines and sets the variable WORKING_WERROR. dnl Modified to fit with PCRE build environment by Cristian Rodríguez. -dnl Adjusted for PCRE2 by PH +dnl Adjusted for PCRE2 by PH. +dnl Refactored to work with non GCC (but compatible) compilers. AC_DEFUN([PCRE2_VISIBILITY], [ AC_REQUIRE([AC_PROG_CC]) VISIBILITY_CFLAGS= - VISIBILITY_CXXFLAGS= HAVE_VISIBILITY=0 - if test -n "$GCC"; then - dnl First, check whether -Werror can be added to the command line, or - dnl whether it leads to an error because of some other option that the - dnl user has put into $CC $CFLAGS $CPPFLAGS. - AC_MSG_CHECKING([whether the -Werror option is usable]) - AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [ - pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -Werror" - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[]], [[]])], - [pcre2_cv_cc_vis_werror=yes], - [pcre2_cv_cc_vis_werror=no]) - CFLAGS="$pcre2_save_CFLAGS"]) - AC_MSG_RESULT([$pcre2_cv_cc_vis_werror]) - dnl Now check whether visibility declarations are supported. - AC_MSG_CHECKING([for simple visibility declarations]) + dnl First, check whether -Werror can be added to the command line, or + dnl whether it leads to an error because of some other option that the + dnl user has put into $CC $CFLAGS $CPPFLAGS. + AC_MSG_CHECKING([whether the -Werror option is usable]) + AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [ + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + pcre2_cv_cc_vis_werror=no + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[]])], + [ + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[ #warning e ]])], + [], [pcre2_cv_cc_vis_werror=yes] + ) + ], []) + CFLAGS="$pcre2_save_CFLAGS"]) + AC_MSG_RESULT([$pcre2_cv_cc_vis_werror]) + if test -n "$pcre2_cv_cc_vis_werror" && test $pcre2_cv_cc_vis_werror = yes + then + WORKING_WERROR=1 + else + WORKING_WERROR=0 + fi + if test $pcre2_cv_cc_vis_werror = yes; then + dnl Now check whether GCC compatible visibility declarations are supported. + AC_MSG_CHECKING([for GCC compatible visibility declarations]) AC_CACHE_VAL([pcre2_cv_cc_visibility], [ pcre2_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fvisibility=hidden" + CFLAGS="$CFLAGS -Werror -fvisibility=hidden" dnl We use the option -Werror and a function dummyfunc, because on some dnl platforms (Cygwin 1.7) the use of -fvisibility triggers a warning dnl "visibility attribute not supported in this configuration; ignored" dnl at the first function definition in every compilation unit, and we dnl don't want to use the option in this case. - if test $pcre2_cv_cc_vis_werror = yes; then - CFLAGS="$CFLAGS -Werror" - fi AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( - [[extern __attribute__((__visibility__("hidden"))) int hiddenvar; - extern __attribute__((__visibility__("default"))) int exportedvar; - extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + [[extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); extern __attribute__((__visibility__("default"))) int exportedfunc (void); void dummyfunc (void) {} ]], @@ -68,21 +68,18 @@ AC_DEFUN([PCRE2_VISIBILITY], [pcre2_cv_cc_visibility=yes], [pcre2_cv_cc_visibility=no]) CFLAGS="$pcre2_save_CFLAGS"]) - AC_MSG_RESULT([$pcre2_cv_cc_visibility]) - if test $pcre2_cv_cc_visibility = yes; then - VISIBILITY_CFLAGS="-fvisibility=hidden" - VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" - HAVE_VISIBILITY=1 - AC_DEFINE(PCRE2_EXPORT, [__attribute__ ((visibility ("default")))], [to make a symbol visible]) - else - AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) - fi + AC_MSG_RESULT([$pcre2_cv_cc_visibility]) + fi + if test -n "$pcre2_cv_cc_visibility" && test $pcre2_cv_cc_visibility = yes + then + VISIBILITY_CFLAGS="-fvisibility=hidden" + HAVE_VISIBILITY=1 + AC_DEFINE(PCRE2_EXPORT, [__attribute__ ((visibility ("default")))], [to make a symbol visible]) else AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) fi AC_SUBST([VISIBILITY_CFLAGS]) - AC_SUBST([VISIBILITY_CXXFLAGS]) AC_SUBST([HAVE_VISIBILITY]) AC_DEFINE_UNQUOTED([HAVE_VISIBILITY], [$HAVE_VISIBILITY], - [Define to 1 if the compiler supports simple visibility declarations.]) + [Define to 1 if the compiler supports GCC compatible visibility declarations.]) ]) diff --git a/perltest.sh b/perltest.sh index 4cd0f8a..37d4511 100755 --- a/perltest.sh +++ b/perltest.sh @@ -1,31 +1,53 @@ #! /bin/sh -# Script for testing regular expressions with perl to check that PCRE2 handles -# them the same. For testing with different versions of Perl, if the first -# argument is -perl then the second is taken as the Perl command to use, and -# both are then removed. If the next argument is "-w", Perl is called with -# "-w", which turns on its warning mode. +# This is a script for testing regular expressions with Perl to check that +# it handles them the same way as PCRE2. For testing with different versions of +# Perl, if the first argument is -perl, the second is taken as the Perl command +# to use, and both are then removed. If the next argument is "-w", Perl is +# called with "-w", which turns on its warning mode. # # The Perl code has to have "use utf8" and "require Encode" at the start when -# running UTF-8 tests, but *not* for non-utf8 tests. (The "require" would +# running UTF-8 tests, but *not* for non-utf8 tests. The "require" would # actually be OK for non-utf8-tests, but is not always installed, so this way -# the script will always run for these tests.) +# the script will always run for these tests. # # The desired effect is achieved by making this a shell script that passes the -# Perl script to Perl through a pipe. If the next argument is "-utf8", a -# suitable prefix is set up. +# a script to Perl through a pipe. See comments below about the data for the +# Perl script. If the next argument of this script is "-utf8", a suitable +# prefix for the Perl script is set up. # -# The remaining arguments, if any, are passed to Perl. They are an input file -# and an output file. If there is one argument, the output is written to -# STDOUT. If Perl receives no arguments, it opens /dev/tty as input, and writes -# output to STDOUT. (I haven't found a way of getting it to use STDIN, because -# of the contorted piping input.) +# A similar process is used to indicate the desire to set a specific locale +# tables per pattern in a similar way to pcre2test through a locale modifier, +# by using the -locale argument. This can be optionally combined with the +# previous arguments; for example, to process an UTF-8 test file in Turkish, +# add the locale=tr_TR.utf8 modifier to the pattern and -locale to perltest, +# or invoke something like (the specific names of the locale might vary): +# +# ./perltest.sh -utf8 -locale=tr_TR.utf8 some-file +# +# If the -locale argument has no setting, a suitable default locale is used +# when possible and reported at startup, it can be always overriden using the +# locale modifier for each pattern. +# +# The remaining arguments of this script, if any, are passed to Perl. They are +# an input file and an output file. If there is one argument, the output is +# written to STDOUT. If Perl receives no arguments, it opens /dev/tty as input, +# and writes output to STDOUT. (I haven't found a way of getting it to use +# STDIN, because of the contorted piping input.) -perl=perl -perlarg='' -prefix='' -if [ $# -gt 1 -a "$1" = "-perl" ] ; then +# Handle the shell script arguments. + +perl=perl +perlarg="" +prefix="" +spc="" + +if [ $# -gt 0 -a "$1" = "-perl" ] ; then + if [ $# -lt 2 ] ; then + echo "perltest.sh: Missing perl command after -perl" + exit 1 + fi shift perl=$1 shift @@ -33,14 +55,44 @@ fi if [ $# -gt 0 -a "$1" = "-w" ] ; then perlarg="-w" + spc=" " shift fi if [ $# -gt 0 -a "$1" = "-utf8" ] ; then - prefix="use utf8; require Encode;" + default_locale="C.utf8" + prefix="\ + use utf8;\ + require Encode;" + perlarg="$perlarg$spc-CSD" shift fi +if [ $# -gt 0 ] ; then + case "$1" in + -locale=*) + default_locale=${1#-locale=} + ;; + -locale) + default_locale=${default_locale:-C} + ;; + *) + skip=1 + esac + if [ -z "$skip" ] ; then + prefix="\ + use POSIX qw(locale_h);\ + use locale qw(:ctype);\ + \ + \$default_locale = setlocale(LC_CTYPE, \"$default_locale\");\ + if (!defined(\$default_locale))\ + { die \"perltest: Failed to set locale \\\"$default_locale\\\"\\\n\"; }\ + print \"Locale: \$default_locale\\\n\";\ + $prefix" + shift + fi +fi + # The Perl script that follows has a similar specification to pcre2test, and so # can be given identical input, except that input patterns can be followed only @@ -50,7 +102,9 @@ fi # aftertext interpreted as "print $' afterwards" # afteralltext ignored # dupnames ignored (Perl always allows) +# hex preprocess pattern with embedded octets # jitstack ignored +# locale use a specific locale tables # mark show mark information # no_auto_possess ignored # no_start_optimize insert (??{""}) at pattern start (disables optimizing) @@ -86,10 +140,11 @@ fi (echo "$prefix" ; cat <<'PERLEND' -# The alpha assertions currently give warnings even when -w is not specified. +# Avoid warnings for some of the experimental features that are being used. no warnings "experimental::alpha_assertions"; no warnings "experimental::script_run"; +no warnings "experimental::vlb"; # Function for turning a string into a string of printing chars. @@ -109,7 +164,7 @@ else { foreach $c (split(//, $_[0])) { - if (ord $c >= 32 && ord $c < 127) { $t .= $c; } + if ($c =~ /^[[:print:]]$/) { $t .= $c; } else { $t .= sprintf("\\x%02x", ord $c); } } } @@ -143,7 +198,7 @@ if (@ARGV > 1) } else { $outfile = "STDOUT"; } -printf($outfile "Perl $^V\n\n"); +printf($outfile "Perl $^V\n"); $extra_modifiers = ""; $default_show_mark = 0; @@ -153,6 +208,12 @@ $default_show_mark = 0; NEXT_RE: for (;;) { + if (defined $locale && defined $default_locale) + { + setlocale(LC_CTYPE, $default_locale); + undef $locale; + } + printf " re> " if $interact; last if ! ($_ = <$infile>); printf $outfile "$_" if ! $interact; @@ -208,9 +269,9 @@ for (;;) # Split the pattern from the modifiers and adjust them as necessary. - $pattern =~ /^\s*((.).*\2)(.*)$/s; - $pat = $1; - $del = $2; + $pattern =~ /^\s*(.)(.*)\1(.*)$/s; + $del = $1; + $pat = $2; $mod = "$3,$extra_modifiers"; $mod =~ s/^,\s*//; @@ -226,10 +287,6 @@ for (;;) $mod =~ s/allaftertext,?//; - # Detect utf - - $utf8 = $mod =~ s/utf,?//; - # Remove "dupnames". $mod =~ s/dupnames,?//; @@ -238,6 +295,19 @@ for (;;) $mod =~ s/jitstack=\d+,?//; + # The "locale" modifier indicates which locale to use + if ($mod =~ /locale=([^,]+),?/) + { + die "perltest: missing -locale cmdline flag" unless defined &setlocale; + $locale = setlocale(LC_CTYPE, $1); + if (!defined $locale) + { + print "** Failed to set locale '$1'\n"; + next NEXT_RE; + } + } + $mod =~ s/locale=[^,]*,?//; # Remove it; "locale=" Ignored + # The "mark" modifier requests checking of MARK data */ $show_mark = $default_show_mark | ($mod =~ s/mark,?//); @@ -246,22 +316,55 @@ for (;;) $mod =~ s/ucp,?/u/; + # Detect utf + + $utf8 = $mod =~ s/utf,?//; + # Remove "no_auto_possess". $mod =~ s/no_auto_possess,?//; + # The "hex" modifier instructs us to preprocess a pattern with embedded + # octets formatted as two digit hexadecimals + + if ($mod =~ s/hex,?//) + { + my $t = ""; + + # find either 2 digit hex octets, optionally surrounded by spaces, to + # add as code points or quoted strings that will be copied verbatim + + while ($pat =~ /\s*(?:(\p{ahex}{2})|(['"])([^\2]+?)\2)\s*/g) + { + if (defined $1) + { + no utf8; + $t .= chr(hex($1)); + use if $utf8, "utf8"; + } + else + { + $t .= $3; + } + } + no utf8; + utf8::decode($t) if $utf8; + use if $utf8, "utf8"; + $pat = $t; + } + # Use no_start_optimize (disable PCRE2 start-up optimization) to disable Perl # optimization by inserting (??{""}) at the start of the pattern. We may # also encounter -no_start_optimize from a #pattern setting. $mod =~ s/-no_start_optimize,?//; - if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; } + if ($mod =~ s/no_start_optimize,?//) { $pat = '(??{""})' . $pat; } # Add back retained modifiers and check that the pattern is valid. $mod =~ s/,//g; - $pattern = "$pat$mod"; + $pattern = "$del$pat$del$mod"; eval "\$_ =~ ${pattern}"; if ($@) @@ -313,7 +416,13 @@ for (;;) } else { - $x = eval "\"$_\""; # To get escapes processed + s/(? header file. */ +/* #undef HAVE_ASSERT_H */ + /* Define this if your compiler supports __attribute__((uninitialized)) */ /* #undef HAVE_ATTRIBUTE_UNINITIALIZED */ -/* Define to 1 if you have the 'bcopy' function. */ +/* Define to 1 if you have the `bcopy' function. */ /* #undef HAVE_BCOPY */ +/* Define this if your compiler provides __assume() */ +/* #undef HAVE_BUILTIN_ASSUME */ + /* Define this if your compiler provides __builtin_mul_overflow() */ /* #undef HAVE_BUILTIN_MUL_OVERFLOW */ +/* Define this if your compiler provides __builtin_unreachable() */ +/* #undef HAVE_BUILTIN_UNREACHABLE */ + /* Define to 1 if you have the header file. */ /* #undef HAVE_BZLIB_H */ @@ -82,16 +91,16 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_LIMITS_H */ -/* Define to 1 if you have the 'memfd_create' function. */ +/* Define to 1 if you have the `memfd_create' function. */ /* #undef HAVE_MEMFD_CREATE */ -/* Define to 1 if you have the 'memmove' function. */ +/* Define to 1 if you have the `memmove' function. */ /* #undef HAVE_MEMMOVE */ /* Define to 1 if you have the header file. */ /* #undef HAVE_MINIX_CONFIG_H */ -/* Define to 1 if you have the 'mkostemp' function. */ +/* Define to 1 if you have the `mkostemp' function. */ /* #undef HAVE_MKOSTEMP */ /* Define if you have POSIX threads libraries and header files. */ @@ -112,7 +121,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the `realpath' function. */ /* #undef HAVE_REALPATH */ -/* Define to 1 if you have the 'secure_getenv' function. */ +/* Define to 1 if you have the `secure_getenv' function. */ /* #undef HAVE_SECURE_GETENV */ /* Define to 1 if you have the header file. */ @@ -124,7 +133,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_STDLIB_H */ -/* Define to 1 if you have the 'strerror' function. */ +/* Define to 1 if you have the `strerror' function. */ /* #undef HAVE_STRERROR */ /* Define to 1 if you have the header file. */ @@ -145,7 +154,8 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_UNISTD_H */ -/* Define to 1 if the compiler supports simple visibility declarations. */ +/* Define to 1 if the compiler supports GCC compatible visibility + declarations. */ /* #undef HAVE_VISIBILITY */ /* Define to 1 if you have the header file. */ @@ -245,7 +255,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.44" +#define PACKAGE_STRING "PCRE2 10.45-RC1" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -254,7 +264,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.44" +#define PACKAGE_VERSION "10.45-RC1" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -311,7 +321,7 @@ sure both macros are undefined; an emulation function will then be used. */ unless SUPPORT_JIT is also defined. */ /* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */ -/* Define to 1 if all of the C89 standard headers exist (not just the ones +/* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ /* #undef STDC_HEADERS */ @@ -366,7 +376,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value for valgrind support to find invalid memory reads. */ /* #undef SUPPORT_VALGRIND */ -/* Enable extensions on AIX, Interix, z/OS. */ +/* Enable extensions on AIX 3, Interix. */ #ifndef _ALL_SOURCE # define _ALL_SOURCE 1 #endif @@ -427,15 +437,11 @@ sure both macros are undefined; an emulation function will then be used. */ #ifndef __STDC_WANT_IEC_60559_DFP_EXT__ # define __STDC_WANT_IEC_60559_DFP_EXT__ 1 #endif -/* Enable extensions specified by C23 Annex F. */ -#ifndef __STDC_WANT_IEC_60559_EXT__ -# define __STDC_WANT_IEC_60559_EXT__ 1 -#endif /* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ # define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 #endif -/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ # define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 #endif @@ -458,26 +464,20 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.44" +#define VERSION "10.45-RC1" /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ -/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ -/* Number of bits in time_t, on hosts where this is settable. */ -/* #undef _TIME_BITS */ - -/* Define to 1 on platforms where this makes time_t a 64-bit type. */ -/* #undef __MINGW_USE_VC2005_COMPAT */ - -/* Define to empty if 'const' does not conform to ANSI C. */ +/* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ /* Define to the type of a signed integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ /* #undef int64_t */ -/* Define as 'unsigned int' if doesn't define. */ +/* Define to `unsigned int' if does not define. */ /* #undef size_t */ diff --git a/src/config.h.in b/src/config.h.in index 8249182..be29681 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -52,15 +52,24 @@ sure both macros are undefined; an emulation function will then be used. */ LF does in an ASCII/Unicode environment. */ #undef EBCDIC_NL25 +/* Define to 1 if you have the header file. */ +#undef HAVE_ASSERT_H + /* Define this if your compiler supports __attribute__((uninitialized)) */ #undef HAVE_ATTRIBUTE_UNINITIALIZED -/* Define to 1 if you have the 'bcopy' function. */ +/* Define to 1 if you have the `bcopy' function. */ #undef HAVE_BCOPY +/* Define this if your compiler provides __assume() */ +#undef HAVE_BUILTIN_ASSUME + /* Define this if your compiler provides __builtin_mul_overflow() */ #undef HAVE_BUILTIN_MUL_OVERFLOW +/* Define this if your compiler provides __builtin_unreachable() */ +#undef HAVE_BUILTIN_UNREACHABLE + /* Define to 1 if you have the header file. */ #undef HAVE_BZLIB_H @@ -82,16 +91,16 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_LIMITS_H -/* Define to 1 if you have the 'memfd_create' function. */ +/* Define to 1 if you have the `memfd_create' function. */ #undef HAVE_MEMFD_CREATE -/* Define to 1 if you have the 'memmove' function. */ +/* Define to 1 if you have the `memmove' function. */ #undef HAVE_MEMMOVE /* Define to 1 if you have the header file. */ #undef HAVE_MINIX_CONFIG_H -/* Define to 1 if you have the 'mkostemp' function. */ +/* Define to 1 if you have the `mkostemp' function. */ #undef HAVE_MKOSTEMP /* Define if you have POSIX threads libraries and header files. */ @@ -112,7 +121,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the `realpath' function. */ #undef HAVE_REALPATH -/* Define to 1 if you have the 'secure_getenv' function. */ +/* Define to 1 if you have the `secure_getenv' function. */ #undef HAVE_SECURE_GETENV /* Define to 1 if you have the header file. */ @@ -124,7 +133,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H -/* Define to 1 if you have the 'strerror' function. */ +/* Define to 1 if you have the `strerror' function. */ #undef HAVE_STRERROR /* Define to 1 if you have the header file. */ @@ -145,7 +154,8 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H -/* Define to 1 if the compiler supports simple visibility declarations. */ +/* Define to 1 if the compiler supports GCC compatible visibility + declarations. */ #undef HAVE_VISIBILITY /* Define to 1 if you have the header file. */ @@ -287,7 +297,7 @@ sure both macros are undefined; an emulation function will then be used. */ unless SUPPORT_JIT is also defined. */ #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR -/* Define to 1 if all of the C89 standard headers exist (not just the ones +/* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #undef STDC_HEADERS @@ -342,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define to any value for valgrind support to find invalid memory reads. */ #undef SUPPORT_VALGRIND -/* Enable extensions on AIX, Interix, z/OS. */ +/* Enable extensions on AIX 3, Interix. */ #ifndef _ALL_SOURCE # undef _ALL_SOURCE #endif @@ -403,15 +413,11 @@ sure both macros are undefined; an emulation function will then be used. */ #ifndef __STDC_WANT_IEC_60559_DFP_EXT__ # undef __STDC_WANT_IEC_60559_DFP_EXT__ #endif -/* Enable extensions specified by C23 Annex F. */ -#ifndef __STDC_WANT_IEC_60559_EXT__ -# undef __STDC_WANT_IEC_60559_EXT__ -#endif /* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ # undef __STDC_WANT_IEC_60559_FUNCS_EXT__ #endif -/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ # undef __STDC_WANT_IEC_60559_TYPES_EXT__ #endif @@ -440,21 +446,15 @@ sure both macros are undefined; an emulation function will then be used. */ /* Number of bits in a file offset, on hosts where this is settable. */ #undef _FILE_OFFSET_BITS -/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* Define for large files, on AIX-style hosts. */ #undef _LARGE_FILES -/* Number of bits in time_t, on hosts where this is settable. */ -#undef _TIME_BITS - -/* Define to 1 on platforms where this makes time_t a 64-bit type. */ -#undef __MINGW_USE_VC2005_COMPAT - -/* Define to empty if 'const' does not conform to ANSI C. */ +/* Define to empty if `const' does not conform to ANSI C. */ #undef const /* Define to the type of a signed integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ #undef int64_t -/* Define as 'unsigned int' if doesn't define. */ +/* Define to `unsigned int' if does not define. */ #undef size_t diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index a322d9f..94c7cbc 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 44 -#define PCRE2_PRERELEASE -#define PCRE2_DATE 2024-06-07 +#define PCRE2_MINOR 45 +#define PCRE2_PRERELEASE -RC1 +#define PCRE2_DATE 2024-12-27 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -143,6 +143,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTENDED_MORE 0x01000000u /* C */ #define PCRE2_LITERAL 0x02000000u /* C */ #define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ +#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */ /* An additional compile options word is available in the compile context. */ @@ -159,6 +160,10 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ #define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ #define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ +#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */ +#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */ +#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */ +#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */ /* These are for pcre2_jit_compile(). */ @@ -166,6 +171,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_JIT_PARTIAL_SOFT 0x00000002u #define PCRE2_JIT_PARTIAL_HARD 0x00000004u #define PCRE2_JIT_INVALID_UTF 0x00000100u +#define PCRE2_JIT_TEST_ALLOC 0x00000200u /* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and pcre2_substitute(). Some are allowed only for one of the functions, and in @@ -318,9 +324,25 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 -#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198 #define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 - +#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200 +#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201 +#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202 +#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203 +#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204 +#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205 +#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206 +#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207 +#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208 +#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209 +#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210 +#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211 +#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213 +#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214 +#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216 /* "Expected" matching error codes: no match and partial match. */ @@ -407,6 +429,9 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) #define PCRE2_ERROR_DFA_UINVALID_UTF (-66) #define PCRE2_ERROR_INVALIDOFFSET (-67) +#define PCRE2_ERROR_JIT_UNSUPPORTED (-68) +#define PCRE2_ERROR_REPLACECASE (-69) +#define PCRE2_ERROR_TOOLARGEREPLACE (-70) /* Request types for pcre2_pattern_info() */ @@ -460,6 +485,30 @@ released, the numbers must not be changed. */ #define PCRE2_CONFIG_COMPILED_WIDTHS 14 #define PCRE2_CONFIG_TABLES_LENGTH 15 +/* Optimization directives for pcre2_set_optimize(). +For binary compatibility, only add to this list; do not renumber. */ + +#define PCRE2_OPTIMIZATION_NONE 0 +#define PCRE2_OPTIMIZATION_FULL 1 + +#define PCRE2_AUTO_POSSESS 64 +#define PCRE2_AUTO_POSSESS_OFF 65 +#define PCRE2_DOTSTAR_ANCHOR 66 +#define PCRE2_DOTSTAR_ANCHOR_OFF 67 +#define PCRE2_START_OPTIMIZE 68 +#define PCRE2_START_OPTIMIZE_OFF 69 + +/* Types used in pcre2_set_substitute_case_callout(). + +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase, +and the rest to lowercase. */ + +#define PCRE2_SUBSTITUTE_CASE_LOWER 1 +#define PCRE2_SUBSTITUTE_CASE_UPPER 2 +#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3 /* Types for code units in patterns and subject strings. */ @@ -613,7 +662,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ - int (*)(uint32_t, void *), void *); + int (*)(uint32_t, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_optimize(pcre2_compile_context *, uint32_t); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ @@ -628,6 +679,11 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_substitute_callout(pcre2_match_context *, \ int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_case_callout(pcre2_match_context *, \ + PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \ + void *), \ + void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -740,6 +796,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ @@ -907,7 +964,9 @@ pcre2_compile are called by application code. */ #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_) #define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) #define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) diff --git a/src/pcre2.h.in b/src/pcre2.h.in index b43534b..ca3f0b4 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -143,6 +143,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTENDED_MORE 0x01000000u /* C */ #define PCRE2_LITERAL 0x02000000u /* C */ #define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ +#define PCRE2_ALT_EXTENDED_CLASS 0x08000000u /* C */ /* An additional compile options word is available in the compile context. */ @@ -159,6 +160,10 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ #define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ #define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ +#define PCRE2_EXTRA_PYTHON_OCTAL 0x00002000u /* C */ +#define PCRE2_EXTRA_NO_BS0 0x00004000u /* C */ +#define PCRE2_EXTRA_NEVER_CALLOUT 0x00008000u /* C */ +#define PCRE2_EXTRA_TURKISH_CASING 0x00010000u /* C */ /* These are for pcre2_jit_compile(). */ @@ -166,6 +171,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_JIT_PARTIAL_SOFT 0x00000002u #define PCRE2_JIT_PARTIAL_HARD 0x00000004u #define PCRE2_JIT_INVALID_UTF 0x00000100u +#define PCRE2_JIT_TEST_ALLOC 0x00000200u /* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and pcre2_substitute(). Some are allowed only for one of the functions, and in @@ -318,9 +324,25 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 -#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198 #define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 - +#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200 +#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201 +#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202 +#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED 203 +#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE 204 +#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF 205 +#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE 206 +#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP 207 +#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR 208 +#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR 209 +#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND 210 +#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS 211 +#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET 212 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR 213 +#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR 214 +#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE 215 +#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216 /* "Expected" matching error codes: no match and partial match. */ @@ -407,6 +429,9 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) #define PCRE2_ERROR_DFA_UINVALID_UTF (-66) #define PCRE2_ERROR_INVALIDOFFSET (-67) +#define PCRE2_ERROR_JIT_UNSUPPORTED (-68) +#define PCRE2_ERROR_REPLACECASE (-69) +#define PCRE2_ERROR_TOOLARGEREPLACE (-70) /* Request types for pcre2_pattern_info() */ @@ -460,6 +485,30 @@ released, the numbers must not be changed. */ #define PCRE2_CONFIG_COMPILED_WIDTHS 14 #define PCRE2_CONFIG_TABLES_LENGTH 15 +/* Optimization directives for pcre2_set_optimize(). +For binary compatibility, only add to this list; do not renumber. */ + +#define PCRE2_OPTIMIZATION_NONE 0 +#define PCRE2_OPTIMIZATION_FULL 1 + +#define PCRE2_AUTO_POSSESS 64 +#define PCRE2_AUTO_POSSESS_OFF 65 +#define PCRE2_DOTSTAR_ANCHOR 66 +#define PCRE2_DOTSTAR_ANCHOR_OFF 67 +#define PCRE2_START_OPTIMIZE 68 +#define PCRE2_START_OPTIMIZE_OFF 69 + +/* Types used in pcre2_set_substitute_case_callout(). + +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase, +and the rest to lowercase. */ + +#define PCRE2_SUBSTITUTE_CASE_LOWER 1 +#define PCRE2_SUBSTITUTE_CASE_UPPER 2 +#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST 3 /* Types for code units in patterns and subject strings. */ @@ -613,7 +662,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ - int (*)(uint32_t, void *), void *); + int (*)(uint32_t, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_optimize(pcre2_compile_context *, uint32_t); #define PCRE2_MATCH_CONTEXT_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ @@ -628,6 +679,11 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_substitute_callout(pcre2_match_context *, \ int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_case_callout(pcre2_match_context *, \ + PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \ + void *), \ + void *); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ @@ -740,6 +796,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + /* Functions for serializing / deserializing compiled patterns. */ #define PCRE2_SERIALIZE_FUNCTIONS \ @@ -907,7 +964,9 @@ pcre2_compile are called by application code. */ #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_) #define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_set_substitute_case_callout PCRE2_SUFFIX(pcre2_set_substitute_case_callout_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) #define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index 210d13d..6d7f27b 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -49,6 +49,10 @@ repeats into possessive repeats where possible. */ #include "pcre2_internal.h" +/* This macro represents the max size of list[] and that is used to keep +track of UCD info in several places, it should be kept on sync with the +value used by GenerateUcd.py */ +#define MAX_LIST 8 /************************************************* * Tables for auto-possessification * @@ -64,7 +68,7 @@ The Unicode property types (\P and \p) have to be present to fill out the table because of what their opcode values are, but the table values should always be zero because property types are handled separately in the code. The last four columns apply to items that cannot be repeated, so there is no need to have -rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is +rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) @@ -123,21 +127,21 @@ opcode is used to select the column. The values are as follows: */ static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { -/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ - { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ - { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ - { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ - { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ - { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ - { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ - { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ - { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ - { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ +/* LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ + { 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ + { 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ + { 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ + { 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ + { 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ + { 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ + /* PT_ANY does not need a record. */ }; /* This table is used to check whether auto-possessification is possible @@ -199,7 +203,7 @@ static BOOL check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, BOOL negated) { -BOOL ok; +BOOL ok, rc; const uint32_t *p; const ucd_record *prop = GET_UCD(c); @@ -240,12 +244,13 @@ switch(ptype) { HSPACE_CASES: VSPACE_CASES: - return negated; + rc = negated; + break; default: - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; + rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; } - break; /* Control never reaches here */ + return rc; case PT_WORD: return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || @@ -259,7 +264,8 @@ switch(ptype) if (c < *p) return !negated; if (c == *p++) return negated; } - break; /* Control never reaches here */ + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ + break; /* Haven't yet thought these through. */ @@ -328,6 +334,7 @@ get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc, PCRE2_UCHAR c = *code; PCRE2_UCHAR base; PCRE2_SPTR end; +PCRE2_SPTR class_end; uint32_t chr; #ifdef SUPPORT_UNICODE @@ -450,10 +457,12 @@ switch(c) code += 2; do { - if (clist_dest >= list + 8) + if (clist_dest >= list + MAX_LIST) { - /* Early return if there is not enough space. This should never - happen, since all clists are shorter than 5 character now. */ + /* Early return if there is not enough space. GenerateUcd.py + generated a list with more than 5 characters and something + must be done about that going forward. */ + PCRE2_DEBUG_UNREACHABLE(); /* Remove if it ever triggers */ list[2] = code[0]; list[3] = code[1]; return code; @@ -473,11 +482,13 @@ switch(c) case OP_CLASS: #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: - if (c == OP_XCLASS) + case OP_ECLASS: + if (c == OP_XCLASS || c == OP_ECLASS) end = code + GET(code, 0) - 1; else #endif end = code + 32 / sizeof(PCRE2_UCHAR); + class_end = end; switch(*end) { @@ -505,6 +516,7 @@ switch(c) break; } list[2] = (uint32_t)(end - code); + list[3] = (uint32_t)(end - class_end); return end; } @@ -537,7 +549,7 @@ compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb, const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit) { PCRE2_UCHAR c; -uint32_t list[8]; +uint32_t list[MAX_LIST]; const uint32_t *chr_ptr; const uint32_t *ochr_ptr; const uint32_t *list_ptr; @@ -581,7 +593,7 @@ for(;;) continue; } - /* At the end of a branch, skip to the end of the group. */ + /* At the end of a branch, skip to the end of the group and process it. */ if (c == OP_ALT) { @@ -638,19 +650,29 @@ for(;;) return FALSE; break; - /* Atomic sub-patterns and assertions can always auto-possessify their - last iterator except for variable length lookbehinds. However, if the - group was entered as a result of checking a previous iterator, this is - not possible. */ + /* Atomic sub-patterns and forward assertions can always auto-possessify + their last iterator. However, if the group was entered as a result of + checking a previous iterator, this is not possible. */ case OP_ASSERT: case OP_ASSERT_NOT: case OP_ONCE: return !entered_a_group; + /* Fixed-length lookbehinds can be treated the same way, but variable + length lookbehinds must not auto-possessify their last iterator. Note + that in order to identify a variable length lookbehind we must check + through all branches, because some may be of fixed length. */ + case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: - return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group; + do + { + if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE; /* Variable */ + bracode += GET(bracode, 1); + } + while (*bracode == OP_ALT); + return !entered_a_group; /* Not variable length */ /* Non-atomic assertions - don't possessify last iterator. This needs more thought. */ @@ -748,12 +770,12 @@ for(;;) if (base_list[0] == OP_CLASS) #endif { - set1 = (uint8_t *)(base_end - base_list[2]); + set1 = (const uint8_t *)(base_end - base_list[2]); list_ptr = list; } else { - set1 = (uint8_t *)(code - list[2]); + set1 = (const uint8_t *)(code - list[2]); list_ptr = base_list; } @@ -762,13 +784,14 @@ for(;;) { case OP_CLASS: case OP_NCLASS: - set2 = (uint8_t *) + set2 = (const uint8_t *) ((list_ptr == list ? code : base_end) - list_ptr[2]); break; #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: - xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; + xclass_flags = (list_ptr == list ? code : base_end) - + list_ptr[2] + LINK_SIZE; if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; if ((*xclass_flags & XCL_MAP) == 0) { @@ -777,7 +800,7 @@ for(;;) /* Might be an empty repeat. */ continue; } - set2 = (uint8_t *)(xclass_flags + 1); + set2 = (const uint8_t *)(xclass_flags + 1); break; #endif @@ -785,21 +808,21 @@ for(;;) invert_bits = TRUE; /* Fall through */ case OP_DIGIT: - set2 = (uint8_t *)(cb->cbits + cbit_digit); + set2 = (const uint8_t *)(cb->cbits + cbit_digit); break; case OP_NOT_WHITESPACE: invert_bits = TRUE; /* Fall through */ case OP_WHITESPACE: - set2 = (uint8_t *)(cb->cbits + cbit_space); + set2 = (const uint8_t *)(cb->cbits + cbit_space); break; case OP_NOT_WORDCHAR: invert_bits = TRUE; /* Fall through */ case OP_WORDCHAR: - set2 = (uint8_t *)(cb->cbits + cbit_word); + set2 = (const uint8_t *)(cb->cbits + cbit_word); break; default: @@ -1084,7 +1107,7 @@ for(;;) case OP_CLASS: if (chr > 255) break; - class_bitset = (uint8_t *) + class_bitset = (const uint8_t *) ((list_ptr == list ? code : base_end) - list_ptr[2]); if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE; break; @@ -1092,9 +1115,18 @@ for(;;) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) - - list_ptr[2] + LINK_SIZE, utf)) return FALSE; + list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf)) + return FALSE; break; -#endif + + case OP_ECLASS: + if (PRIV(eclass)(chr, + (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE, + (list_ptr == list ? code : base_end) - list_ptr[3], + (const uint8_t*)cb->start_code, utf)) + return FALSE; + break; +#endif /* SUPPORT_WIDE_CHARS */ default: return FALSE; @@ -1109,8 +1141,8 @@ for(;;) if (list[1] == 0) return TRUE; } -/* Control never reaches here. There used to be a fail-save return FALSE; here, -but some compilers complain about an unreachable statement. */ +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return FALSE; /* Avoid compiler warnings */ } @@ -1140,7 +1172,7 @@ PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb) PCRE2_UCHAR c; PCRE2_SPTR end; PCRE2_UCHAR *repeat_opcode; -uint32_t list[8]; +uint32_t list[MAX_LIST]; int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ BOOL utf = (cb->external_options & PCRE2_UTF) != 0; BOOL ucp = (cb->external_options & PCRE2_UCP) != 0; @@ -1149,7 +1181,11 @@ for (;;) { c = *code; - if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */ + if (c >= OP_TABLE_LENGTH) + { + PCRE2_DEBUG_UNREACHABLE(); + return -1; /* Something gone wrong */ + } if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) { @@ -1198,10 +1234,14 @@ for (;;) } c = *code; } - else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS) + else if (c == OP_CLASS || c == OP_NCLASS +#ifdef SUPPORT_WIDE_CHARS + || c == OP_XCLASS || c == OP_ECLASS +#endif + ) { #ifdef SUPPORT_WIDE_CHARS - if (c == OP_XCLASS) + if (c == OP_XCLASS || c == OP_ECLASS) repeat_opcode = code + GET(code, 1); else #endif @@ -1211,7 +1251,7 @@ for (;;) if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) { /* The return from get_chr_property_list() will never be NULL when - *code (aka c) is one of the three class opcodes. However, gcc with + *code (aka c) is one of the four class opcodes. However, gcc with -fanalyzer notes that a NULL return is possible, and grumbles. Hence we put in a check. */ @@ -1279,6 +1319,7 @@ for (;;) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: code += GET(code, 1); break; #endif diff --git a/src/pcre2_chkdint.c b/src/pcre2_chkdint.c index d04f6f8..7083023 100644 --- a/src/pcre2_chkdint.c +++ b/src/pcre2_chkdint.c @@ -74,9 +74,7 @@ if (__builtin_mul_overflow(a, b, &m)) return TRUE; #else INT64_OR_DOUBLE m; -#ifdef PCRE2_DEBUG -if (a < 0 || b < 0) abort(); -#endif +PCRE2_ASSERT(a >= 0 && b >= 0); m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b; @@ -93,4 +91,4 @@ if (m > PCRE2_SIZE_MAX) return TRUE; return FALSE; } -/* End of pcre_chkdint.c */ +/* End of pcre2_chkdint.c */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 8e6787a..0ffac89 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -47,7 +47,7 @@ POSSIBILITY OF SUCH DAMAGE. #define PSSTART start_pattern /* Field containing processed string start */ #define PSEND end_pattern /* Field containing processed string end */ -#include "pcre2_internal.h" +#include "pcre2_compile.h" /* In rare error cases debugging might require calling pcre2_printint(). */ @@ -108,20 +108,8 @@ them will be able to (i.e. assume a 64-bit world). */ #define SIZEOFFSET 2 #endif -/* Macros for manipulating elements of the parsed pattern vector. */ - -#define META_CODE(x) (x & 0xffff0000u) -#define META_DATA(x) (x & 0x0000ffffu) -#define META_DIFF(x,y) ((x-y)>>16) - /* Function definitions to allow mutual recursion */ -#ifdef SUPPORT_UNICODE -static unsigned int - add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t, - compile_block *, const uint32_t *, unsigned int); -#endif - static int compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, @@ -199,106 +187,6 @@ don't have to check them every time. */ #define OFLOW_MAX (INT_MAX - 20) -/* Code values for parsed patterns, which are stored in a vector of 32-bit -unsigned ints. Values less than META_END are literal data values. The coding -for identifying the item is in the top 16-bits, leaving 16 bits for the -additional data that some of them need. The META_CODE, META_DATA, and META_DIFF -macros are used to manipulate parsed pattern elements. - -NOTE: When these definitions are changed, the table of extra lengths for each -code (meta_extra_lengths, just below) must be updated to remain in step. */ - -#define META_END 0x80000000u /* End of pattern */ - -#define META_ALT 0x80010000u /* alternation */ -#define META_ATOMIC 0x80020000u /* atomic group */ -#define META_BACKREF 0x80030000u /* Back ref */ -#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */ -#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */ -#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */ -#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */ -#define META_CAPTURE 0x80080000u /* Capturing parenthesis */ -#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */ -#define META_CLASS 0x800a0000u /* start non-empty class */ -#define META_CLASS_EMPTY 0x800b0000u /* empty class */ -#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */ -#define META_CLASS_END 0x800d0000u /* end of non-empty class */ -#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */ -#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */ -#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */ -#define META_COND_NAME 0x80110000u /* (?()... */ -#define META_COND_NUMBER 0x80120000u /* (?(digits)... */ -#define META_COND_RNAME 0x80130000u /* (?(R&name)... */ -#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */ -#define META_COND_VERSION 0x80150000u /* (?(VERSIONx.y)... */ -#define META_DOLLAR 0x80160000u /* $ metacharacter */ -#define META_DOT 0x80170000u /* . metacharacter */ -#define META_ESCAPE 0x80180000u /* \d and friends */ -#define META_KET 0x80190000u /* closing parenthesis */ -#define META_NOCAPTURE 0x801a0000u /* no capture parens */ -#define META_OPTIONS 0x801b0000u /* (?i) and friends */ -#define META_POSIX 0x801c0000u /* POSIX class item */ -#define META_POSIX_NEG 0x801d0000u /* negative POSIX class item */ -#define META_RANGE_ESCAPED 0x801e0000u /* range with at least one escape */ -#define META_RANGE_LITERAL 0x801f0000u /* range defined literally */ -#define META_RECURSE 0x80200000u /* Recursion */ -#define META_RECURSE_BYNAME 0x80210000u /* (?&name) */ -#define META_SCRIPT_RUN 0x80220000u /* (*script_run:...) */ - -/* These must be kept together to make it easy to check that an assertion -is present where expected in a conditional group. */ - -#define META_LOOKAHEAD 0x80230000u /* (?= */ -#define META_LOOKAHEADNOT 0x80240000u /* (?! */ -#define META_LOOKBEHIND 0x80250000u /* (?<= */ -#define META_LOOKBEHINDNOT 0x80260000u /* (? */ CHAR_GREATER_THAN_SIGN, /* ? */ CHAR_QUESTION_MARK, + /* @ */ CHAR_COMMERCIAL_AT, /* A */ -ESC_A, + /* B */ -ESC_B, /* C */ -ESC_C, + /* D */ -ESC_D, /* E */ -ESC_E, + /* F */ 0, /* G */ -ESC_G, + /* H */ -ESC_H, /* I */ 0, + /* J */ 0, /* K */ -ESC_K, + /* L */ 0, /* M */ 0, + /* N */ -ESC_N, /* O */ 0, + /* P */ -ESC_P, /* Q */ -ESC_Q, + /* R */ -ESC_R, /* S */ -ESC_S, + /* T */ 0, /* U */ 0, + /* V */ -ESC_V, /* W */ -ESC_W, + /* X */ -ESC_X, /* Y */ 0, + /* Z */ -ESC_Z, /* [ */ CHAR_LEFT_SQUARE_BRACKET, + /* \ */ CHAR_BACKSLASH, /* ] */ CHAR_RIGHT_SQUARE_BRACKET, + /* ^ */ CHAR_CIRCUMFLEX_ACCENT, /* _ */ CHAR_UNDERSCORE, + /* ` */ CHAR_GRAVE_ACCENT, /* a */ CHAR_BEL, + /* b */ -ESC_b, /* c */ 0, + /* d */ -ESC_d, /* e */ CHAR_ESC, + /* f */ CHAR_FF, /* g */ 0, + /* h */ -ESC_h, /* i */ 0, + /* j */ 0, /* k */ -ESC_k, + /* l */ 0, /* m */ 0, + /* n */ CHAR_LF, /* o */ 0, + /* p */ -ESC_p, /* q */ 0, + /* r */ CHAR_CR, /* s */ -ESC_s, + /* t */ CHAR_HT, /* u */ 0, + /* v */ -ESC_v, /* w */ -ESC_w, + /* x */ 0, /* y */ 0, + /* z */ -ESC_z }; #else @@ -656,6 +542,8 @@ static const char alasnames[] = STRING_non_atomic_positive_lookbehind0 STRING_negative_lookahead0 STRING_negative_lookbehind0 + STRING_scs0 + STRING_scan_substring0 STRING_atomic0 STRING_sr0 STRING_asr0 @@ -675,6 +563,8 @@ static const alasitem alasmeta[] = { { 30, META_LOOKBEHIND_NA }, { 18, META_LOOKAHEADNOT }, { 19, META_LOOKBEHINDNOT }, + { 3, META_SCS }, + { 14, META_SCS }, { 6, META_ATOMIC }, { 2, META_SCRIPT_RUN }, /* sr = script run */ { 3, META_ATOMIC_SCRIPT_RUN }, /* asr = atomic script run */ @@ -694,8 +584,11 @@ static uint32_t chartypeoffset[] = { now all in a single string, to reduce the number of relocations when a shared library is dynamically loaded. The list of lengths is terminated by a zero length entry. The first three must be alpha, lower, upper, as this is assumed -for handling case independence. The indices for several classes are needed, so -identify them. */ +for handling case independence. + +The indices for several classes are stored in pcre2_compile.h - these must +be kept in sync with posix_names, posix_name_lengths, posix_class_maps, +and posix_substitutes. */ static const char posix_names[] = STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 @@ -706,12 +599,6 @@ static const char posix_names[] = static const uint8_t posix_name_lengths[] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; -#define PC_DIGIT 7 -#define PC_GRAPH 8 -#define PC_PRINT 9 -#define PC_PUNCT 10 -#define PC_XDIGIT 13 - /* Table of class bit maps for each POSIX class. Each class is formed from a base map, with an optional addition or removal of another map. Then, for some classes, there is some additional tweaking: for [:blank:] the vertical space @@ -722,7 +609,7 @@ addition or a negative value for map subtraction (if there are two maps). The absolute value of the third field has these meanings: 0 => no tweaking, 1 => remove vertical space characters, 2 => remove underscore. */ -static const int posix_class_maps[] = { +const int PRIV(posix_class_maps)[] = { cbit_word, cbit_digit, -2, /* alpha */ cbit_lower, -1, 0, /* lower */ cbit_upper, -1, 0, /* upper */ @@ -760,7 +647,6 @@ static int posix_substitutes[] = { PT_WORD, 0, /* word */ /* Perl and POSIX space are the same */ PT_PXXDIGIT, 0 /* xdigit */ /* Perl has additional hex digits */ }; -#define POSIX_SUBSIZE (sizeof(posix_substitutes) / (2*sizeof(uint32_t))) #endif /* SUPPORT_UNICODE */ /* Masks for checking option settings. When PCRE2_LITERAL is set, only a subset @@ -778,10 +664,11 @@ are allowed. */ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MATCH_UNSET_BACKREF| \ PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \ PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \ - PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY) + PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_ALT_EXTENDED_CLASS) #define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \ - (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_CASELESS_RESTRICT) + (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD| \ + PCRE2_EXTRA_CASELESS_RESTRICT|PCRE2_EXTRA_TURKISH_CASING) #define PUBLIC_COMPILE_EXTRA_OPTIONS \ (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ @@ -789,27 +676,8 @@ are allowed. */ PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \ PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \ - PCRE2_EXTRA_ASCII_DIGIT) - -/* Compile time error code numbers. They are given names so that they can more -easily be tracked. When a new number is added, the tables called eint1 and -eint2 in pcre2posix.c may need to be updated, and a new error text must be -added to compile_error_texts in pcre2_error.c. Also, the error codes in -pcre2.h.in must be updated - their values are exactly 100 greater than these -values. */ - -enum { ERR0 = COMPILE_ERROR_BASE, - ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, - ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, - ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, - ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, - ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, - ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, - ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, - ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, - ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, - ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, - ERR101 }; + PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_PYTHON_OCTAL|PCRE2_EXTRA_NO_BS0| \ + PCRE2_EXTRA_NEVER_CALLOUT) /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -817,16 +685,18 @@ compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is generic and always supported. */ enum { PSO_OPT, /* Value is an option bit */ + PSO_XOPT, /* Value is an xoption bit */ PSO_FLG, /* Value is a flag bit */ PSO_NL, /* Value is a newline type */ PSO_BSR, /* Value is a \R type */ PSO_LIMH, /* Read integer value for heap limit */ PSO_LIMM, /* Read integer value for match limit */ - PSO_LIMD /* Read integer value for depth limit */ + PSO_LIMD, /* Read integer value for depth limit */ + PSO_OPTMZ /* Value is an optimization bit */ }; typedef struct pso { - const uint8_t *name; + const char *name; uint16_t length; uint16_t type; uint32_t value; @@ -835,27 +705,29 @@ typedef struct pso { /* NB: STRING_UTFn_RIGHTPAR contains the length as well */ static const pso pso_list[] = { - { (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, - { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, - { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, - { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, - { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET }, - { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, - { (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR }, - { (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, - { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, - { (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 }, - { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, - { (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 }, - { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 }, - { (uint8_t *)STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR }, - { (uint8_t *)STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF }, - { (uint8_t *)STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF }, - { (uint8_t *)STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY }, - { (uint8_t *)STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL }, - { (uint8_t *)STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF }, - { (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF }, - { (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } + { STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, + { STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, + { STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, + { STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, + { STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET }, + { STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPTMZ, PCRE2_OPTIM_AUTO_POSSESS }, + { STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPTMZ, PCRE2_OPTIM_DOTSTAR_ANCHOR }, + { STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, + { STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPTMZ, PCRE2_OPTIM_START_OPTIMIZE }, + { STRING_CASELESS_RESTRICT_RIGHTPAR, 18, PSO_XOPT, PCRE2_EXTRA_CASELESS_RESTRICT }, + { STRING_TURKISH_CASING_RIGHTPAR, 15, PSO_XOPT, PCRE2_EXTRA_TURKISH_CASING }, + { STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 }, + { STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, + { STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 }, + { STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 }, + { STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR }, + { STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF }, + { STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF }, + { STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY }, + { STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL }, + { STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF }, + { STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF }, + { STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } }; /* This table is used when converting repeating opcodes into possessified @@ -910,12 +782,15 @@ static const uint8_t opcode_possessify[] = { OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */ 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */ - 0, 0, 0, /* CLASS, NCLASS, XCLASS */ + 0, 0, 0, 0, /* CLASS, NCLASS, XCLASS, ECLASS */ 0, 0, /* REF, REFI */ 0, 0, /* DNREF, DNREFI */ - 0, 0 /* RECURSE, CALLOUT */ + 0, 0, /* RECURSE, CALLOUT */ }; +/* Compile-time check that the table has the correct size. */ +STATIC_ASSERT(sizeof(opcode_possessify) == OP_CALLOUT+1, opcode_possessify); + #ifdef DEBUG_SHOW_PARSED /************************************************* @@ -977,7 +852,7 @@ for (;;) { uint32_t ptype = *pptr >> 16; uint32_t pvalue = *pptr++ & 0xffff; - fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? 'P':'p', + fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? CHAR_P:CHAR_p, ptype, pvalue); } else @@ -1152,6 +1027,24 @@ for (;;) fprintf(stderr, "%zd", offset); break; + case META_OFFSET: + fprintf(stderr, "META_OFFSET offset="); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_SCS: + fprintf(stderr, "META (*scan_substring:"); + break; + + case META_SCS_NAME: + fprintf(stderr, "META_SCS_NAME length=%d relative_offset=%d", *pptr++, (int)meta_arg); + break; + + case META_SCS_NUMBER: + fprintf(stderr, "META_SCS_NUMBER %d relative_offset=%d", *pptr++, (int)meta_arg); + break; + case META_MARK: fprintf(stderr, "META (*MARK:"); goto SHOWARG; @@ -1180,6 +1073,12 @@ for (;;) } fprintf(stderr, ") length=%u", length); break; + + case META_ECLASS_AND: fprintf(stderr, "META_ECLASS_AND"); break; + case META_ECLASS_OR: fprintf(stderr, "META_ECLASS_OR"); break; + case META_ECLASS_SUB: fprintf(stderr, "META_ECLASS_SUB"); break; + case META_ECLASS_XOR: fprintf(stderr, "META_ECLASS_XOR"); break; + case META_ECLASS_NOT: fprintf(stderr, "META_ECLASS_NOT"); break; } fprintf(stderr, "\n"); } @@ -1199,7 +1098,7 @@ associated JIT data. */ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION pcre2_code_copy(const pcre2_code *code) { -PCRE2_SIZE* ref_count; +PCRE2_SIZE *ref_count; pcre2_code *newcode; if (code == NULL) return NULL; @@ -1311,7 +1210,10 @@ Arguments: ptrptr points to the character pointer variable ptrend points to the end of the input string allow_sign if < 0, sign not allowed; if >= 0, sign is relative to this - max_value the largest number allowed + max_value the largest number allowed; + you must not pass a value for max_value larger than + INT_MAX/10 - 1 because this function relies on max_value to + avoid integer overflow max_error the error to give for an over-large number intptr where to put the result errcodeptr where to put an error code @@ -1330,6 +1232,8 @@ uint32_t n = 0; PCRE2_SPTR ptr = *ptrptr; BOOL yield = FALSE; +PCRE2_ASSERT(max_value <= INT_MAX/10 - 1); + *errorcodeptr = 0; if (allow_sign >= 0 && ptr < ptrend) @@ -1350,10 +1254,11 @@ if (allow_sign >= 0 && ptr < ptrend) if (ptr >= ptrend || !IS_DIGIT(*ptr)) return FALSE; while (ptr < ptrend && IS_DIGIT(*ptr)) { - n = n * 10 + *ptr++ - CHAR_0; + n = n * 10 + (*ptr++ - CHAR_0); if (n > max_value) { *errorcodeptr = max_error; + while (ptr < ptrend && IS_DIGIT(*ptr)) ptr++; goto EXIT; } } @@ -1367,7 +1272,7 @@ if (allow_sign >= 0 && sign != 0) } if (sign > 0) n += allow_sign; - else if ((int)n > allow_sign) + else if (n > (uint32_t)allow_sign) { *errorcodeptr = ERR15; /* Non-existent subpattern */ goto EXIT; @@ -1454,7 +1359,7 @@ else if (pp >= ptrend || *pp != CHAR_RIGHT_CURLY_BRACKET) return FALSE; } -/* Now process the quantifier for real. We know it must be {n} or (n,} or {,m} +/* Now process the quantifier for real. We know it must be {n} or {n,} or {,m} or {n,m}. The only error that read_number() can return is for a number that is too big. If *errorcodeptr is returned as zero it means no number was found. */ @@ -1521,15 +1426,15 @@ return yield; /* This function is called when a \ has been encountered. It either returns a positive value for a simple escape such as \d, or 0 for a data character, which -is placed in chptr. A backreference to group n is returned as negative n. On +is placed in chptr. A backreference to group n is returned as -(n+1). On entry, ptr is pointing at the character after \. On exit, it points after the final code unit of the escape sequence. This function is also called from pcre2_substitute() to handle escape sequences in replacement strings. In this case, the cb argument is NULL, and in the case of escapes that have further processing, only sequences that define a data -character are recognised. The isclass argument is not relevant; the options -argument is the final value of the compiled pattern's options. +character are recognised. The options argument is the final value of the +compiled pattern's options. Arguments: ptrptr points to the input position pointer @@ -1538,7 +1443,8 @@ Arguments: errorcodeptr points to the errorcode variable (containing zero) options the current options bits xoptions the current extra options bits - isclass TRUE if inside a character class + bracount the number of capturing parentheses encountered so far + isclass TRUE if in a character class cb compile data block or NULL when called from pcre2_substitute() Returns: zero => a data character @@ -1549,8 +1455,8 @@ Returns: zero => a data character int PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, - int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, - compile_block *cb) + int *errorcodeptr, uint32_t options, uint32_t xoptions, uint32_t bracount, + BOOL isclass, compile_block *cb) { BOOL utf = (options & PCRE2_UTF) != 0; BOOL alt_bsux = @@ -1619,17 +1525,23 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) { -#ifdef EBCDIC - *errorcodeptr = ERR93; -#else +#ifndef EBCDIC if (utf) { ptr = p + 2; escape = 0; /* Not a fancy escape after all */ goto COME_FROM_NU; } - else *errorcodeptr = ERR93; #endif + *errorcodeptr = ERR93; + } + + /* Give an error in contexts where quantifiers are not allowed + (character classes; substitution strings). */ + + else if (isclass || cb == NULL) + { + *errorcodeptr = ERR37; } /* Give an error if what follows is not a quantifier, but don't override @@ -1660,7 +1572,8 @@ else if (cb == NULL) { - if (c != CHAR_c && c != CHAR_o && c != CHAR_x) + if (c < CHAR_0 || + (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x && c != CHAR_g))) { *errorcodeptr = ERR3; return 0; @@ -1715,6 +1628,7 @@ else hptr >= ptrend || /* Hit end of input */ *hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */ { + if (isclass) break; /* In a class, just treat as '\u' literal */ escape = ESC_ub; /* Special return */ ptr++; /* Skip { */ break; /* Hex escape not recognized */ @@ -1773,8 +1687,14 @@ else (possibly recursive) subroutine calls, _not_ backreferences. We return the ESC_g code. - Summary: Return a negative number for a numerical back reference, ESC_k for - a named back reference, and ESC_g for a named or numbered subroutine call. + Summary: Return a negative number for a numerical back reference (offset + by 1), ESC_k for a named back reference, and ESC_g for a named or + numbered subroutine call. + + The above describes the \g behaviour inside patterns. Inside replacement + strings (pcre2_substitute) we support only \g for Python + compatibility. Return ESG_g for the named case, and -(num+1) for the + numbered case. */ case CHAR_g: @@ -1786,6 +1706,40 @@ else break; } + if (cb == NULL) + { + PCRE2_SPTR p; + /* Substitution strings */ + if (*ptr != CHAR_LESS_THAN_SIGN) + { + *errorcodeptr = ERR57; + break; + } + + p = ptr + 1; + + if (!read_number(&p, ptrend, -1, MAX_GROUP_NUMBER, ERR61, &s, + errorcodeptr)) + { + if (*errorcodeptr == 0) escape = ESC_g; /* No number found */ + break; + } + + if (p >= ptrend || *p != CHAR_GREATER_THAN_SIGN) + { + /* not advancing ptr; report error at the \g character */ + *errorcodeptr = ERR57; + break; + } + + /* This is the reason that back references are returned as -(s+1) rather + than just -s. In a pattern, \0 is not a back reference, but \g<0> is + valid in a substitution string, so this must be representable. */ + ptr = p + 1; + escape = -(s+1); + break; + } + if (*ptr == CHAR_LESS_THAN_SIGN || *ptr == CHAR_APOSTROPHE) { escape = ESC_g; @@ -1800,7 +1754,7 @@ else PCRE2_SPTR p = ptr + 1; while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; - if (!read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + if (!read_number(&p, ptrend, bracount, MAX_GROUP_NUMBER, ERR61, &s, errorcodeptr)) { if (*errorcodeptr == 0) escape = ESC_k; /* No number found */ @@ -1810,6 +1764,7 @@ else if (p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) { + /* not advancing ptr; report error at the \g character */ *errorcodeptr = ERR57; break; } @@ -1820,7 +1775,7 @@ else else { - if (!read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + if (!read_number(&ptr, ptrend, bracount, MAX_GROUP_NUMBER, ERR61, &s, errorcodeptr)) { if (*errorcodeptr == 0) *errorcodeptr = ERR57; /* No number found */ @@ -1834,7 +1789,7 @@ else break; } - escape = -s; + escape = -(s+1); break; /* The handling of escape sequences consisting of a string of digits @@ -1846,7 +1801,16 @@ else number is less than 10, or if there are that many previous extracting left brackets, it is a back reference. Otherwise, up to three octal digits are read to form an escaped character code. Thus \123 is likely to be octal 123 - (cf \0123, which is octal 012 followed by the literal 3). + (cf \0123, which is octal 012 followed by the literal 3). This is the "Perl + style" of handling ambiguous octal/backrefences such as \12. + + There is an alternative disambiguation strategy, selected by + PCRE2_EXTRA_PYTHON_OCTAL, which follows Python's behaviour. An octal must + have either a leading zero, or exactly three octal digits; otherwise it's + a backreference. The disambiguation is stable, and does not depend on how + many capture groups are defined (it's simply an invalid backreference if + there is no corresponding capture group). Additionally, octal values above + \377 (\xff) are rejected. Inside a character class, \ followed by a digit is always either a literal 8 or 9 or an octal number. */ @@ -1854,24 +1818,65 @@ else case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: - if (!isclass) + if (isclass) + { + /* Fall through to octal handling; never a backreference inside a class. */ + } + else if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0) + { + /* Python-style disambiguation. */ + if (ptr[-1] <= CHAR_7 && ptr + 1 < ptrend && ptr[0] >= CHAR_0 && + ptr[0] <= CHAR_7 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) + { + /* We peeked a three-digit octal, so fall through */ + } + else + { + /* We are at a digit, so the only possible error from read_number() is + a number that is too large. */ + ptr--; /* Back to the digit */ + + if (!read_number(&ptr, ptrend, -1, MAX_GROUP_NUMBER, 0, &s, errorcodeptr)) + { + *errorcodeptr = ERR61; + break; + } + + escape = -(s+1); + break; + } + } + else { + /* Perl-style disambiguation. */ oldptr = ptr; ptr--; /* Back to the digit */ /* As we know we are at a digit, the only possible error from - read_number() is a number that is too large to be a group number. In this - case we fall through handle this as not a group reference. If we have - read a small enough number, check for a back reference. + read_number() is a number that is too large to be a group number. Because + that number might be still valid if read as an octal, errorcodeptr is not + set on failure and therefore a sentinel value of INT_MAX is used instead + of the original value, and will be used later to properly set the error, + if not falling through. */ + + if (!read_number(&ptr, ptrend, -1, MAX_GROUP_NUMBER, 0, &s, errorcodeptr)) + s = INT_MAX; - \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x are octal escapes if there are not that many previous captures. */ - if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && - (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) + if (s < 10 || c >= CHAR_8 || (unsigned)s <= bracount) { - if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; - else escape = -s; /* Indicates a back reference */ + /* s > MAX_GROUP_NUMBER should not be possible because of read_number(), + but we keep it just to be safe and because it will also catch the + sentinel value that was set on failure by that function. */ + + if ((unsigned)s > MAX_GROUP_NUMBER) + { + PCRE2_ASSERT(s == INT_MAX); + *errorcodeptr = ERR61; + } + else escape = -(s+1); /* Indicates a back reference */ break; } @@ -1890,16 +1895,26 @@ else /* \0 always starts an octal number, but we may drop through to here with a larger first octal digit. The original code used just to take the least significant 8 bits of octal numbers (I think this is what early Perls used - to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, + to do). Nowadays we allow for larger numbers in UTF-8 mode and 16/32-bit mode, but no more than 3 octal digits. */ case CHAR_0: c -= CHAR_0; while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) c = c * 8 + *ptr++ - CHAR_0; + if (c > 0xff) + { + if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0) *errorcodeptr = ERR102; #if PCRE2_CODE_UNIT_WIDTH == 8 - if (!utf && c > 0xff) *errorcodeptr = ERR51; + else if (!utf) *errorcodeptr = ERR51; #endif + } + + /* PCRE2_EXTRA_NO_BS0 disables the NUL escape '\0' but doesn't affect + two- or three-character octal escapes \00 and \000, nor \x00. */ + + if ((xoptions & PCRE2_EXTRA_NO_BS0) != 0 && c == 0 && i == 1) + *errorcodeptr = ERR98; break; /* \o is a relatively new Perl feature, supporting a more general way of @@ -1928,7 +1943,7 @@ else cc = *ptr++; if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ #if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x20000000l) { overflow = TRUE; break; } + if (c >= 0x20000000u) { overflow = TRUE; break; } #endif c = (c << 3) + (cc - CHAR_0); #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -2054,10 +2069,29 @@ else else { - c = 0; - if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ + /* Perl has the surprising/broken behaviour that \x without following + hex digits is treated as an escape for NUL. Their source code laments + this but keeps it for backwards compatibility. A warning is printed + when "use warnings" is enabled. Because we don't have warnings, we + simply forbid it. */ + if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) + { + /* Not a hex digit */ + *errorcodeptr = ERR78; + break; + } ptr++; c = cc; + + /* With "use re 'strict'" Perl actually requires exactly two digits (error + for \x, \xA and \xAAA). While \x was already rejected, this seems overly + strict, and there seems little incentive to align with that, given the + backwards-compatibility cost. + + For comparison, note that other engines disagree. For example: + - Java allows 1 or 2 hex digits. Error if 0 digits. No error if >2 digits + - .NET requires 2 hex digits. Error if 0, 1 digits. No error if >2 digits. + */ if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ ptr++; c = (c << 4) | cc; @@ -2179,37 +2213,65 @@ c = *ptr++; *negptr = FALSE; /* \P or \p can be followed by a name in {}, optionally preceded by ^ for -negation. */ +negation. We must be handling Unicode encoding here, though we may be compiling +for UTF-8 input in an EBCDIC environment. (PCRE2 does not support both EBCDIC +input and Unicode input in the same build.) In accordance with Unicode's "loose +matching" rules, ASCII white space, hyphens, and underscores are ignored. We +don't use isspace() or tolower() because (a) code points may be greater than +255, and (b) they wouldn't work when compiling for Unicode in an EBCDIC +environment. */ if (c == CHAR_LEFT_CURLY_BRACKET) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; - if (*ptr == CHAR_CIRCUMFLEX_ACCENT) - { - *negptr = TRUE; - ptr++; - } - for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) { + REDO: + if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; -#if PCRE2_CODE_UNIT_WIDTH != 8 - while (c == '_' || c == '-' || (c <= 0xff && isspace(c))) -#else - while (c == '_' || c == '-' || isspace(c)) -#endif + + /* Skip ignorable Unicode characters. */ + + while (c == CHAR_UNDERSCORE || c == CHAR_MINUS || c == CHAR_SPACE || + (c >= CHAR_HT && c <= CHAR_CR)) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; } - if (c == CHAR_NUL) goto ERROR_RETURN; + + /* The first significant character being circumflex negates the meaning of + the item. */ + + if (i == 0 && !*negptr && c == CHAR_CIRCUMFLEX_ACCENT) + { + *negptr = TRUE; + goto REDO; + } + if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = tolower(c); - if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i; + + /* Names consist of ASCII letters and digits, but equals and colon may also + occur as a name/value separator. We must also allow for \p{L&}. A simple + check for a value between '&' and 'z' suffices because anything else in a + name or value will cause an "unknown property" error anyway. */ + + if (c < CHAR_AMPERSAND || c > CHAR_z) goto ERROR_RETURN; + + /* Lower case a capital letter or remember where the name/value separator + is. */ + + if (c >= CHAR_A && c <= CHAR_Z) c |= 0x20; + else if ((c == CHAR_COLON || c == CHAR_EQUALS_SIGN) && vptr == NULL) + vptr = name + i; + + name[i] = c; } + /* Error if the loop didn't end with '}' - either we hit the end of the + pattern or the name was longer than any legal property name. */ + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; name[i] = 0; } @@ -2217,14 +2279,19 @@ if (c == CHAR_LEFT_CURLY_BRACKET) /* If { doesn't follow \p or \P there is just one following character, which must be an ASCII letter. */ -else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) +else if (c >= CHAR_A && c <= CHAR_Z) + { + name[0] = c | 0x20; /* Lower case */ + name[1] = 0; + } +else if (c >= CHAR_a && c <= CHAR_z) { - name[0] = tolower(c); + name[0] = c; name[1] = 0; } else goto ERROR_RETURN; -*ptrptr = ptr; +*ptrptr = ptr; /* Update pattern pointer */ /* If the property contains ':' or '=' we have class name and value separately specified. The following are supported: @@ -2482,6 +2549,9 @@ if (ptr >= ptrend) /* No characters in name */ *nameptr = ptr; *offsetptr = (PCRE2_SIZE)(ptr - cb->start_pattern); +/* If this logic were ever to change, the matching function in pcre2_substitute.c +ought to be updated to match. */ + /* In UTF mode, a group name may contain letters and decimal digits as defined by Unicode properties, and underscores, but must not start with a digit. */ @@ -2700,6 +2770,60 @@ return parsed_pattern; +/************************************************* +* Maximum size of parsed_pattern for given input * +*************************************************/ + +/* This function is called from parse_regex() below, to determine the amount +of memory to allocate for parsed_pattern. It is also called to check whether +the amount of data written respects the amount of memory allocated. + +Arguments: + ptr points to the start of the pattern + ptrend points to the end of the pattern + utf TRUE in UTF mode + options the options bits + +Returns: the number of uint32_t units for parsed_pattern +*/ +static ptrdiff_t +max_parsed_pattern(PCRE2_SPTR ptr, PCRE2_SPTR ptrend, BOOL utf, + uint32_t options) +{ +PCRE2_SIZE big32count = 0; +ptrdiff_t parsed_size_needed; + +/* When PCRE2_AUTO_CALLOUT is not set, in all but one case the number of +unsigned 32-bit ints written out to the parsed pattern is bounded by the length +of the pattern. The exceptional case is when running in 32-bit, non-UTF mode, +when literal characters greater than META_END (0x80000000) have to be coded as +two units. In this case, therefore, we scan the pattern to check for such +values. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!utf) + { + PCRE2_SPTR p; + for (p = ptr; p < ptrend; p++) if (*p >= META_END) big32count++; + } +#else +(void)utf; /* Avoid compiler warning */ +#endif + +parsed_size_needed = (ptrend - ptr) + big32count; + +/* When PCRE2_AUTO_CALLOUT is set we have to assume a numerical callout (4 +elements) for each character. This is overkill, but memory is plentiful these +days. */ + +if ((options & PCRE2_AUTO_CALLOUT) != 0) + parsed_size_needed += (ptrend - ptr) * 4; + +return parsed_size_needed; +} + + + /************************************************* * Parse regex and identify named groups * *************************************************/ @@ -2751,7 +2875,33 @@ the main compiling phase. */ /* States used for analyzing ranges in character classes. The two OK values must be last. */ -enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL }; +enum { + RANGE_NO, /* State after '[' (initial), or '[a-z'; hyphen is literal */ + RANGE_STARTED, /* State after '[1-'; last-emitted code is META_RANGE_XYZ */ + RANGE_FORBID_NO, /* State after '[\d'; '-]' is allowed but not '-1]' */ + RANGE_FORBID_STARTED, /* State after '[\d-'*/ + RANGE_OK_ESCAPED, /* State after '[\1'; hyphen may be a range */ + RANGE_OK_LITERAL /* State after '[1'; hyphen may be a range */ +}; + +/* States used for analyzing operators and operands in extended character +classes. */ + +enum { + CLASS_OP_EMPTY, /* At start of an expression; empty previous contents */ + CLASS_OP_OPERAND, /* Have preceding operand; after "z" a "--" can follow */ + CLASS_OP_OPERATOR /* Have preceding operator; after "--" operand must follow */ +}; + +/* States used for determining the parse mode in character classes. The two +PERL_EXT values must be last. */ + +enum { + CLASS_MODE_NORMAL, /* Ordinary PCRE2 '[...]' class. */ + CLASS_MODE_ALT_EXT, /* UTS#18-style extended '[...]' class. */ + CLASS_MODE_PERL_EXT, /* Perl extended '(?[...])' class. */ + CLASS_MODE_PERL_EXT_LEAF /* Leaf within extended '(?[ [...] ])' class. */ +}; /* Only in 32-bit mode can there be literals > META_END. A macro encapsulates the storing of literal values in the main parsed pattern, where they can always @@ -2770,13 +2920,16 @@ be quantified. */ /* Here's the actual function. */ -static int parse_regex(PCRE2_SPTR ptr, uint32_t options, BOOL *has_lookbehind, - compile_block *cb) +static int parse_regex(PCRE2_SPTR ptr, uint32_t options, uint32_t xoptions, + BOOL *has_lookbehind, compile_block *cb) { uint32_t c; uint32_t delimiter; uint32_t namelen; uint32_t class_range_state; +uint32_t class_op_state; +uint32_t class_mode_state; +uint32_t *class_start; uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */ uint32_t *verbstartptr = NULL; uint32_t *previous_callout = NULL; @@ -2786,8 +2939,9 @@ uint32_t *this_parsed_item = NULL; uint32_t *prev_parsed_item = NULL; uint32_t meta_quantifier = 0; uint32_t add_after_mark = 0; -uint32_t xoptions = cb->cx->extra_options; uint16_t nest_depth = 0; +int16_t class_depth_m1 = -1; /* The m1 means minus 1. */ +int16_t class_maxdepth_m1 = -1; int after_manual_callout = 0; int expect_cond_assert = 0; int errorcode = 0; @@ -2804,8 +2958,17 @@ PCRE2_SPTR thisptr; PCRE2_SPTR name; PCRE2_SPTR ptrend = cb->end_pattern; PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */ +PCRE2_SPTR class_range_forbid_ptr = NULL; named_group *ng; nest_save *top_nest, *end_nests; +#ifdef PCRE2_DEBUG +uint32_t *parsed_pattern_check; +ptrdiff_t parsed_pattern_extra = 0; +ptrdiff_t parsed_pattern_extra_check = 0; +PCRE2_SPTR ptr_check; +#endif + +PCRE2_ASSERT(parsed_pattern != NULL); /* Insert leading items for word and line matching (features provided for the benefit of pcre2grep). */ @@ -2821,6 +2984,11 @@ else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) *parsed_pattern++ = META_NOCAPTURE; } +#ifdef PCRE2_DEBUG +parsed_pattern_check = parsed_pattern; +ptr_check = ptr; +#endif + /* If the pattern is actually a literal string, process it separately to avoid cluttering up the main loop. */ @@ -2830,6 +2998,7 @@ if ((options & PCRE2_LITERAL) != 0) { if (parsed_pattern >= parsed_pattern_end) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR63; /* Internal error (parsed pattern overflow) */ goto FAILED; } @@ -2873,18 +3042,40 @@ while (ptr < ptrend) PCRE2_SPTR tempptr; PCRE2_SIZE offset; - if (parsed_pattern >= parsed_pattern_end) - { - errorcode = ERR63; /* Internal error (parsed pattern overflow) */ - goto FAILED; - } - if (nest_depth > cb->cx->parens_nest_limit) { errorcode = ERR19; goto FAILED; /* Parentheses too deeply nested */ } + /* Check that we haven't emitted too much into parsed_pattern. We allocate + a suitably-sized buffer upfront, then do unchecked writes to it. If we only + write a little bit too much, everything will appear to be OK, because the + upfront size is an overestimate... but a malicious pattern could end up + forcing a write past the buffer end. We must catch this during + development. */ + +#ifdef PCRE2_DEBUG + /* Strong post-write check. Won't help in release builds - at this point + the write has already occurred so it's too late. However, should stop us + committing unsafe code. */ + PCRE2_ASSERT((parsed_pattern - parsed_pattern_check) + + (parsed_pattern_extra - parsed_pattern_extra_check) <= + max_parsed_pattern(ptr_check, ptr, utf, options)); + parsed_pattern_check = parsed_pattern; + parsed_pattern_extra_check = parsed_pattern_extra; + ptr_check = ptr; +#endif + + if (parsed_pattern >= parsed_pattern_end) + { + /* Weak pre-write check; only ensures parsed_pattern[0] is writeable + (but the code below can write many chars). Better than nothing. */ + PCRE2_DEBUG_UNREACHABLE(); + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } + /* If the last time round this loop something was added, parsed_pattern will no longer be equal to this_parsed_item. Remember where the previous item started and reset for the next item. Note that sometimes round the loop, @@ -3004,7 +3195,7 @@ while (ptr < ptrend) if ((options & PCRE2_ALT_VERBNAMES) != 0) { escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, - xoptions, FALSE, cb); + xoptions, cb->bracount, FALSE, cb); if (errorcode != 0) goto FAILED; } else escape = 0; /* Treat all as literal */ @@ -3204,7 +3395,7 @@ while (ptr < ptrend) case CHAR_BACKSLASH: tempptr = ptr; escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, - xoptions, FALSE, cb); + xoptions, cb->bracount, FALSE, cb); if (errorcode != 0) { ESCAPE_FAILED: @@ -3235,7 +3426,7 @@ while (ptr < ptrend) else if (escape < 0) { offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 1); - escape = -escape; + escape = -escape - 1; *parsed_pattern++ = META_BACKREF | (uint32_t)escape; if (escape < 10) { @@ -3347,7 +3538,7 @@ while (ptr < ptrend) /* When \g is used with quotes or angle brackets as delimiters, it is a numerical or named subroutine call, and control comes here. When used - with brace delimiters it is a numberical back reference and does not come + with brace delimiters it is a numerical back reference and does not come here because check_escape() returns it directly as a reference. \k is always a named back reference. */ @@ -3458,7 +3649,7 @@ while (ptr < ptrend) if (!prev_okquantifier) { errorcode = ERR9; - goto FAILED_BACK; + goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 } /* Most (*VERB)s are not allowed to be quantified, but an ungreedy @@ -3474,6 +3665,11 @@ while (ptr < ptrend) *verbstartptr = META_NOCAPTURE; parsed_pattern[1] = META_KET; parsed_pattern += 2; + +#ifdef PCRE2_DEBUG + PCRE2_ASSERT(parsed_pattern_extra >= 2); + parsed_pattern_extra -= 2; +#endif } /* Now we can put the quantifier into the parsed pattern vector. At this @@ -3493,7 +3689,6 @@ while (ptr < ptrend) /* ---- Character class ---- */ case CHAR_LEFT_SQUARE_BRACKET: - okquantifier = TRUE; /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is used for "start of word" and "end of word". As these are otherwise illegal @@ -3531,6 +3726,7 @@ while (ptr < ptrend) } *parsed_pattern++ = META_KET; ptr += 6; + okquantifier = TRUE; break; } @@ -3545,46 +3741,14 @@ while (ptr < ptrend) goto FAILED; } - /* Process a regular character class. If the first character is '^', set - the negation flag. If the first few characters (either before or after ^) - are \Q\E or \E or space or tab in extended-more mode, we skip them too. - This makes for compatibility with Perl. */ - - negate_class = FALSE; - while (ptr < ptrend) - { - GETCHARINCTEST(c, ptr); - if (c == CHAR_BACKSLASH) - { - if (ptr < ptrend && *ptr == CHAR_E) ptr++; - else if (ptrend - ptr >= 3 && - PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 3; - else - break; - } - else if ((options & PCRE2_EXTENDED_MORE) != 0 && - (c == CHAR_SPACE || c == CHAR_HT)) /* Note: just these two */ - continue; - else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) - negate_class = TRUE; - else break; - } - - /* Now the real contents of the class; c has the first "real" character. - Empty classes are permitted only if the option is set. */ - - if (c == CHAR_RIGHT_SQUARE_BRACKET && - (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) - { - *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY; - break; /* End of class processing */ - } + class_mode_state = ((options & PCRE2_ALT_EXTENDED_CLASS) != 0)? + CLASS_MODE_ALT_EXT : CLASS_MODE_NORMAL; - /* Process a non-empty class. */ + /* Jump here from '(?[...])'. That jump must initialize class_mode_state, + set c to the '[' character, and ptr to just after the '['. */ - *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS; - class_range_state = RANGE_NO; + FROM_PERL_EXTENDED_CLASS: + okquantifier = TRUE; /* In an EBCDIC environment, Perl treats alphabetic ranges specially because there are holes in the encoding, and simply using the range A-Z @@ -3594,7 +3758,16 @@ while (ptr < ptrend) character values are literal or not, and a state variable for handling ranges. */ - /* Loop for the contents of the class */ + /* Loop for the contents of the class. Classes may be nested, if + PCRE2_ALT_EXTENDED_CLASS is set, or the class is of the form (?[...]). */ + + /* c is still set to '[' so the loop will handle the start of the class. */ + + class_depth_m1 = -1; + class_maxdepth_m1 = -1; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_EMPTY; + class_start = NULL; for (;;) { @@ -3610,13 +3783,26 @@ while (ptr < ptrend) ptr++; /* Skip the 'E' */ goto CLASS_CONTINUE; } + + /* Surprisingly, you cannot use \Q..\E to escape a character inside a + Perl extended class. However, empty \Q\E sequences are allowed, so here + were're only giving an error if the \Q..\E is non-empty. */ + + if (class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR116; + goto FAILED; + } + goto CLASS_LITERAL; } - /* Skip over space and tab (only) in extended-more mode. */ + /* Skip over space and tab (only) in extended-more mode, or anywhere + inside a Perl extended class (which implies /xx). */ - if ((options & PCRE2_EXTENDED_MORE) != 0 && - (c == CHAR_SPACE || c == CHAR_HT)) + if ((c == CHAR_SPACE || c == CHAR_HT) && + ((options & PCRE2_EXTENDED_MORE) != 0 || + class_mode_state >= CLASS_MODE_PERL_EXT)) goto CLASS_CONTINUE; /* Handle POSIX class names. Perl allows a negation extension of the @@ -3625,7 +3811,8 @@ while (ptr < ptrend) [.ch.] and [=ch=] ("collating elements") and fault them, as Perl 5.6 and 5.8 do. */ - if (c == CHAR_LEFT_SQUARE_BRACKET && + if (class_depth_m1 >= 0 && + c == CHAR_LEFT_SQUARE_BRACKET && ptrend - ptr >= 3 && (*ptr == CHAR_COLON || *ptr == CHAR_DOT || *ptr == CHAR_EQUALS_SIGN) && @@ -3641,14 +3828,41 @@ while (ptr < ptrend) if (class_range_state == RANGE_STARTED) { + ptr = tempptr + 2; + errorcode = ERR50; + goto FAILED; + } + + /* Perl treats a hyphen after a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode + unless the hyphen is the last character in the class. PCRE does not + have a warning mode, so we give an error, because this is likely an + error on the user's part. + + Roll back to the hyphen for the error position. */ + + if (class_range_state == RANGE_FORBID_STARTED) + { + ptr = class_range_forbid_ptr; errorcode = ERR50; goto FAILED; } + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + ptr = tempptr + 2; + errorcode = ERR113; + goto FAILED; + } + if (*ptr != CHAR_COLON) { + ptr = tempptr + 2; errorcode = ERR13; - goto FAILED_BACK; + goto FAILED; } if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT) @@ -3658,33 +3872,19 @@ while (ptr < ptrend) } posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); + ptr = tempptr + 2; if (posix_class < 0) { errorcode = ERR30; goto FAILED; } - ptr = tempptr + 2; - - /* Perl treats a hyphen after a POSIX class as a literal, not the - start of a range. However, it gives a warning in its warning mode - unless the hyphen is the last character in the class. PCRE does not - have a warning mode, so we give an error, because this is likely an - error on the user's part. */ - - if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && - ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) - { - errorcode = ERR50; - goto FAILED; - } - /* Set "a hyphen is not the start of a range" for the -] case, and also - in case the POSIX class is followed by \E or \Q\E (possibly repeated - - fuzzers do that kind of thing) and *then* a hyphen. This causes that - hyphen to be treated as a literal. I don't think it's worth setting up - special apparatus to do otherwise. */ + /* Set "a hyphen is forbidden to be the start of a range". For the '-]' + case, the hyphen is treated as a literal, but for '-1' it is disallowed + (because it would be interpreted as range). */ - class_range_state = RANGE_NO; + class_range_state = RANGE_FORBID_NO; + class_op_state = CLASS_OP_OPERAND; /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some of the POSIX classes are converted to use Unicode properties \p or \P @@ -3727,56 +3927,344 @@ while (ptr < ptrend) *parsed_pattern++ = posix_class; } - /* Handle potential start of range */ + /* Check for the start of the outermost class, or the start of a nested class. */ - else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED) + else if ((c == CHAR_LEFT_SQUARE_BRACKET && + (class_depth_m1 < 0 || class_mode_state == CLASS_MODE_ALT_EXT || + class_mode_state == CLASS_MODE_PERL_EXT)) || + (c == CHAR_LEFT_PARENTHESIS && + class_mode_state == CLASS_MODE_PERL_EXT)) { - *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)? - META_RANGE_LITERAL : META_RANGE_ESCAPED; - class_range_state = RANGE_STARTED; - } + uint32_t start_c = c; + uint32_t new_class_mode_state; - /* Handle a literal character */ + /* Update the class mode, if moving into a 'leaf' inside a Perl extended + class. */ + + if (start_c == CHAR_LEFT_SQUARE_BRACKET && + class_mode_state == CLASS_MODE_PERL_EXT && class_depth_m1 >= 0) + new_class_mode_state = CLASS_MODE_PERL_EXT_LEAF; + else + new_class_mode_state = class_mode_state; + + /* Tidy up the other class before starting the nested class. */ + /* -[ beginning a nested class is a literal '-' */ - else if (c != CHAR_BACKSLASH) - { - CLASS_LITERAL: if (class_range_state == RANGE_STARTED) + parsed_pattern[-1] = CHAR_MINUS; + + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) { - if (c == parsed_pattern[-2]) /* Optimize one-char range */ - parsed_pattern--; - else if (parsed_pattern[-2] > c) /* Check range is in order */ + errorcode = ERR113; + goto FAILED; + } + + /* Validate nesting depth */ + if (class_depth_m1 >= ECLASS_NEST_LIMIT - 1) + { + errorcode = ERR107; + goto FAILED; /* Classes too deeply nested */ + } + + /* Process the character class start. If the first character is '^', set + the negation flag. If the first few characters (either before or after ^) + are \Q\E or \E or space or tab in extended-more mode, we skip them too. + This makes for compatibility with Perl. */ + + negate_class = FALSE; + for (;;) + { + if (ptr >= ptrend) { - errorcode = ERR8; - goto FAILED_BACK; + if (start_c == CHAR_LEFT_PARENTHESIS) + errorcode = ERR14; /* Missing terminating ')' */ + else + errorcode = ERR6; /* Missing terminating ']' */ + goto FAILED; } - else + + GETCHARINCTEST(c, ptr); + if (new_class_mode_state == CLASS_MODE_PERL_EXT) break; + else if (c == CHAR_BACKSLASH) { - if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL) - parsed_pattern[-1] = META_RANGE_ESCAPED; - PARSED_LITERAL(c, parsed_pattern); + if (ptr < ptrend && *ptr == CHAR_E) ptr++; + else if (ptrend - ptr >= 3 && + PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0) + ptr += 3; + else + break; } - class_range_state = RANGE_NO; + else if ((c == CHAR_SPACE || c == CHAR_HT) && /* Note: just these two */ + ((options & PCRE2_EXTENDED_MORE) != 0 || + new_class_mode_state >= CLASS_MODE_PERL_EXT)) + continue; + else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) + negate_class = TRUE; + else break; } - else /* Potential start of range */ + + /* Now the real contents of the class; c has the first "real" character. + Empty classes are permitted only if the option is set, and if it's not + a Perl-extended class. */ + + if (c == CHAR_RIGHT_SQUARE_BRACKET && + (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0 && + new_class_mode_state < CLASS_MODE_PERL_EXT) { - class_range_state = char_is_literal? - RANGE_OK_LITERAL : RANGE_OK_ESCAPED; + PCRE2_ASSERT(start_c == CHAR_LEFT_SQUARE_BRACKET); + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY; + + /* Leave nesting depth unchanged; but check for zero depth to handle the + very first (top-level) class being empty. */ + if (class_depth_m1 < 0) break; + + class_range_state = RANGE_NO; /* for processing the containing class */ + class_op_state = CLASS_OP_OPERAND; + goto CLASS_CONTINUE; + } + + /* Enter a non-empty class. */ + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + class_start = parsed_pattern; + *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_EMPTY; + class_mode_state = new_class_mode_state; + ++class_depth_m1; + if (class_maxdepth_m1 < class_depth_m1) + class_maxdepth_m1 = class_depth_m1; + /* Reset; no op seen yet at new depth. */ + cb->class_op_used[class_depth_m1] = 0; + + /* Implement the special start-of-class literal meaning of ']'. */ + if (c == CHAR_RIGHT_SQUARE_BRACKET && + new_class_mode_state != CLASS_MODE_PERL_EXT) + { + class_range_state = RANGE_OK_LITERAL; + class_op_state = CLASS_OP_OPERAND; PARSED_LITERAL(c, parsed_pattern); + goto CLASS_CONTINUE; + } + + continue; /* We have already loaded c with the next character */ + } + + /* Check for the end of the class. */ + + else if (c == CHAR_RIGHT_SQUARE_BRACKET || + (c == CHAR_RIGHT_PARENTHESIS && class_mode_state == CLASS_MODE_PERL_EXT)) + { + /* In Perl extended mode, the ']' can only be used to match the + opening '[', and ')' must match an opening parenthesis. */ + if (class_mode_state == CLASS_MODE_PERL_EXT) + { + if (c == CHAR_RIGHT_SQUARE_BRACKET && class_depth_m1 != 0) + { + errorcode = ERR14; + goto FAILED_BACK; + } + if (c == CHAR_RIGHT_PARENTHESIS && class_depth_m1 < 1) + { + errorcode = ERR22; + goto FAILED; + } + } + + /* Check no trailing operator. */ + if (class_op_state == CLASS_OP_OPERATOR) + { + errorcode = ERR110; + goto FAILED; + } + + /* Check no empty expression for Perl extended expressions. */ + if (class_mode_state == CLASS_MODE_PERL_EXT && + class_op_state == CLASS_OP_EMPTY) + { + errorcode = ERR114; + goto FAILED; + } + + /* -] at the end of a class is a literal '-' */ + if (class_range_state == RANGE_STARTED) + parsed_pattern[-1] = CHAR_MINUS; + + *parsed_pattern++ = META_CLASS_END; + + if (--class_depth_m1 < 0) + { + /* Check for and consume ')' after '(?[...]'. */ + PCRE2_ASSERT(class_mode_state != CLASS_MODE_PERL_EXT_LEAF); + if (class_mode_state == CLASS_MODE_PERL_EXT) + { + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR115; + goto FAILED; + } + + ptr++; + } + + break; + } + + class_range_state = RANGE_NO; /* for processing the containing class */ + class_op_state = CLASS_OP_OPERAND; + if (class_mode_state == CLASS_MODE_PERL_EXT_LEAF) + class_mode_state = CLASS_MODE_PERL_EXT; + /* The extended class flag has already + been set for the parent class. */ + class_start = NULL; + } + + /* Handle a Perl set binary operator */ + + else if (class_mode_state == CLASS_MODE_PERL_EXT && + (c == CHAR_PLUS || c == CHAR_VERTICAL_LINE || c == CHAR_MINUS || + c == CHAR_AMPERSAND || c == CHAR_CIRCUMFLEX_ACCENT)) + { + /* Check that there was a preceding operand. */ + if (class_op_state != CLASS_OP_OPERAND) + { + errorcode = ERR109; + goto FAILED; + } + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + PCRE2_ASSERT(class_range_state != RANGE_STARTED && + class_range_state != RANGE_FORBID_STARTED); + + *parsed_pattern++ = c == CHAR_PLUS? META_ECLASS_OR : + c == CHAR_VERTICAL_LINE? META_ECLASS_OR : + c == CHAR_MINUS? META_ECLASS_SUB : + c == CHAR_AMPERSAND? META_ECLASS_AND : + META_ECLASS_XOR; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERATOR; + } + + /* Handle a Perl set unary operator */ + + else if (class_mode_state == CLASS_MODE_PERL_EXT && + c == CHAR_EXCLAMATION_MARK) + { + /* Check that the "!" has not got a preceding operand (i.e. it's the + start of the class, or follows an operator). */ + if (class_op_state == CLASS_OP_OPERAND) + { + errorcode = ERR113; + goto FAILED; + } + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + PCRE2_ASSERT(class_range_state != RANGE_STARTED && + class_range_state != RANGE_FORBID_STARTED); + + *parsed_pattern++ = META_ECLASS_NOT; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERATOR; + } + + /* Handle a UTS#18 set operator */ + + else if (class_mode_state == CLASS_MODE_ALT_EXT && + (c == CHAR_VERTICAL_LINE || c == CHAR_MINUS || + c == CHAR_AMPERSAND || c == CHAR_TILDE) && + ptr < ptrend && *ptr == c) + { + ++ptr; + + /* Check there isn't a triple-repetition. */ + if (ptr < ptrend && *ptr == c) + { + while (ptr < ptrend && *ptr == c) ++ptr; /* Improve error offset. */ + errorcode = ERR108; + goto FAILED; + } + + /* Check for a preceding operand. */ + if (class_op_state != CLASS_OP_OPERAND) + { + errorcode = ERR109; + goto FAILED; + } + + /* Check for mixed precedence. Forbid [A--B&&C]. */ + if (cb->class_op_used[class_depth_m1] != 0 && + cb->class_op_used[class_depth_m1] != (uint8_t)c) + { + errorcode = ERR111; + goto FAILED; } + + if (class_start != NULL) + { + PCRE2_ASSERT(class_depth_m1 >= 0); + /* Represents that the class is an extended class. */ + *class_start |= CLASS_IS_ECLASS; + class_start = NULL; + } + + /* Dangling '-' before an operator is a literal */ + if (class_range_state == RANGE_STARTED) + parsed_pattern[-1] = CHAR_MINUS; + + *parsed_pattern++ = c == CHAR_VERTICAL_LINE? META_ECLASS_OR : + c == CHAR_MINUS? META_ECLASS_SUB : + c == CHAR_AMPERSAND? META_ECLASS_AND : + META_ECLASS_XOR; + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERATOR; + cb->class_op_used[class_depth_m1] = (uint8_t)c; } /* Handle escapes in a class */ - else + else if (c == CHAR_BACKSLASH) { tempptr = ptr; escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, - xoptions, TRUE, cb); + xoptions, cb->bracount, TRUE, cb); if (errorcode != 0) { - if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) + if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0 || + class_mode_state >= CLASS_MODE_PERL_EXT) goto FAILED; ptr = tempptr; if (ptr >= ptrend) c = CHAR_BACKSLASH; else @@ -3797,6 +4285,11 @@ while (ptr < ptrend) char_is_literal = FALSE; goto CLASS_LITERAL; + case ESC_k: + c = CHAR_k; /* \k is not special in a class, just like \g */ + char_is_literal = FALSE; + goto CLASS_LITERAL; + case ESC_Q: inescq = TRUE; /* Enter literal mode */ goto CLASS_CONTINUE; @@ -3808,29 +4301,10 @@ while (ptr < ptrend) case ESC_R: case ESC_X: errorcode = ERR7; - ptr--; + ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; - } - - /* The second part of a range can be a single-character escape - sequence (detected above), but not any of the other escapes. Perl - treats a hyphen as a literal in such circumstances. However, in Perl's - warning mode, a warning is given, so PCRE now faults it, as it is - almost certainly a mistake on the user's part. */ - - if (class_range_state == RANGE_STARTED) - { - errorcode = ERR50; - goto FAILED; /* Not CLASS_ESCAPE_FAILED; always an error */ - } - - /* Of the remaining escapes, only those that define characters are - allowed in a class. None may start a range. */ - class_range_state = RANGE_NO; - switch(escape) - { - case ESC_N: + case ESC_N: /* Not permitted by Perl either */ errorcode = ERR71; goto FAILED; @@ -3864,6 +4338,18 @@ while (ptr < ptrend) uint16_t ptype = 0, pdata = 0; if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb)) goto FAILED; + + /* In caseless matching, particular characteristics Lu, Ll, and Lt + get converted to the general characteristic L&. That is, upper, + lower, and title case letters are all conflated. */ + + if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC && + (pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt)) + { + ptype = PT_LAMP; + pdata = 0; + } + if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; *parsed_pattern++ = META_ESCAPE + escape; *parsed_pattern++ = (ptype << 16) | pdata; @@ -3874,21 +4360,134 @@ while (ptr < ptrend) #endif break; /* End \P and \p */ - default: /* All others are not allowed in a class */ + /* All others are not allowed in a class */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + /* Fall through */ + + case ESC_A: + case ESC_Z: + case ESC_z: + case ESC_G: + case ESC_K: + case ESC_C: errorcode = ERR7; - ptr--; + ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 + goto FAILED; + } + + /* All the switch-cases above which end in "break" describe a set + of characters. None may start a range. */ + + /* The second part of a range can be a single-character escape + sequence (detected above), but not any of the other escapes. Perl + treats a hyphen as a literal in such circumstances. However, in Perl's + warning mode, a warning is given, so PCRE now faults it, as it is + almost certainly a mistake on the user's part. */ + + if (class_range_state == RANGE_STARTED) + { + errorcode = ERR50; + goto FAILED; + } + + /* Perl gives a warning unless the hyphen following a multi-character + escape is the last character in the class. PCRE throws an error. */ + + if (class_range_state == RANGE_FORBID_STARTED) + { + ptr = class_range_forbid_ptr; + errorcode = ERR50; + goto FAILED; + } + + /* Disallow implicit union in Perl extended classes. */ + + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR113; goto FAILED; } - /* Perl gives a warning unless a following hyphen is the last character - in the class. PCRE throws an error. */ + class_range_state = RANGE_FORBID_NO; + class_op_state = CLASS_OP_OPERAND; + } + + /* Forbid unescaped literals, and the special meaning of '-', inside a + Perl extended class. */ + + else if (class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR116; + goto FAILED; + } + + /* Handle potential start of range */ + + else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED) + { + *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)? + META_RANGE_LITERAL : META_RANGE_ESCAPED; + class_range_state = RANGE_STARTED; + } + + /* Handle forbidden start of range */ + + else if (c == CHAR_MINUS && class_range_state == RANGE_FORBID_NO) + { + *parsed_pattern++ = CHAR_MINUS; + class_range_state = RANGE_FORBID_STARTED; + class_range_forbid_ptr = ptr; + } + + /* Handle a literal character */ + + else + { + CLASS_LITERAL: + + /* Disallow implicit union in Perl extended classes. */ - if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && - ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) + if (class_op_state == CLASS_OP_OPERAND && + class_mode_state == CLASS_MODE_PERL_EXT) + { + errorcode = ERR113; + goto FAILED; + } + + if (class_range_state == RANGE_STARTED) + { + if (c == parsed_pattern[-2]) /* Optimize one-char range */ + parsed_pattern--; + else if (parsed_pattern[-2] > c) /* Check range is in order */ + { + errorcode = ERR8; + goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 + } + else + { + if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL) + parsed_pattern[-1] = META_RANGE_ESCAPED; + PARSED_LITERAL(c, parsed_pattern); + } + class_range_state = RANGE_NO; + class_op_state = CLASS_OP_OPERAND; + } + else if (class_range_state == RANGE_FORBID_STARTED) { + ptr = class_range_forbid_ptr; errorcode = ERR50; goto FAILED; } + else /* Potential start of range */ + { + class_range_state = char_is_literal? + RANGE_OK_LITERAL : RANGE_OK_ESCAPED; + class_op_state = CLASS_OP_OPERAND; + PARSED_LITERAL(c, parsed_pattern); + } } /* Proceed to next thing in the class. */ @@ -3896,22 +4495,18 @@ while (ptr < ptrend) CLASS_CONTINUE: if (ptr >= ptrend) { - errorcode = ERR6; /* Missing terminating ']' */ + if (class_mode_state == CLASS_MODE_PERL_EXT && class_depth_m1 > 0) + errorcode = ERR14; /* Missing terminating ')' */ + if (class_mode_state == CLASS_MODE_ALT_EXT && + class_depth_m1 == 0 && class_maxdepth_m1 == 1) + errorcode = ERR112; /* Missing terminating ']', but we saw '[ [ ]...' */ + else + errorcode = ERR6; /* Missing terminating ']' */ goto FAILED; } GETCHARINCTEST(c, ptr); - if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; } /* End of class-processing loop */ - /* -] at the end of a class is a literal '-' */ - - if (class_range_state == RANGE_STARTED) - { - parsed_pattern[-1] = CHAR_MINUS; - class_range_state = RANGE_NO; - } - - *parsed_pattern++ = META_CLASS_END; break; /* End of character class */ @@ -3994,8 +4589,7 @@ while (ptr < ptrend) if (prev_expect_cond_assert > 0 && (meta < META_LOOKAHEAD || meta > META_LOOKBEHINDNOT)) { - errorcode = (meta == META_LOOKAHEAD_NA || meta == META_LOOKBEHIND_NA)? - ERR98 : ERR28; /* (Atomic) assertion expected */ + errorcode = ERR28; /* Atomic assertion expected */ goto FAILED; } @@ -4005,6 +4599,7 @@ while (ptr < ptrend) switch(meta) { default: + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR89; /* Unknown code; should never occur because */ goto FAILED; /* the meta values come from a table above. */ @@ -4020,6 +4615,90 @@ while (ptr < ptrend) case META_LOOKAHEADNOT: goto NEGATIVE_LOOK_AHEAD; + case META_SCS: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + if (*ptr != CHAR_LEFT_PARENTHESIS) + { + errorcode = ERR15; + goto FAILED; + } + + ptr++; + *parsed_pattern++ = META_SCS; + /* Temporary variable, zero in the first iteration. */ + offset = 0; + + for (;;) + { + PCRE2_SIZE next_offset = (PCRE2_SIZE)(ptr - cb->start_pattern); + + /* Handle (scan_substring:([+-]number)... */ + if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, + &i, &errorcode)) + { + PCRE2_ASSERT(i >= 0); + if (i <= 0) + { + errorcode = ERR15; + goto FAILED; + } + meta = META_SCS_NUMBER; + namelen = (uint32_t)i; + } + else if (errorcode != 0) goto FAILED; /* Number too big */ + else + { + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* Handle (*scan_substring:('name') or (*scan_substring:() */ + if (*ptr == CHAR_LESS_THAN_SIGN) + terminator = CHAR_GREATER_THAN_SIGN; + else if (*ptr == CHAR_APOSTROPHE) + terminator = CHAR_APOSTROPHE; + else + { + errorcode = ERR15; + goto FAILED; + } + + if (!read_name(&ptr, ptrend, utf, terminator, &next_offset, + &name, &namelen, &errorcode, cb)) goto FAILED; + + meta = META_SCS_NAME; + } + + PCRE2_ASSERT(next_offset > 0); + if (offset == 0 || (next_offset - offset) >= 0x10000) + { + *parsed_pattern++ = META_OFFSET; + PUTOFFSET(next_offset, parsed_pattern); + offset = next_offset; + } + + /* The offset is encoded as a relative offset, because for some + inputs such as ",2" in (*scs:(1,2,3)...), we only have space for + two uint32_t values, and an opcode and absolute offset may require + three uint32_t values. */ + *parsed_pattern++ = meta | (uint32_t)(next_offset - offset); + *parsed_pattern++ = namelen; + offset = next_offset; + + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + if (*ptr == CHAR_RIGHT_PARENTHESIS) break; + + if (*ptr != CHAR_COMMA) + { + errorcode = ERR24; + goto FAILED; + } + + ptr++; + } + ptr++; + goto POST_ASSERTION; + case META_LOOKBEHIND: case META_LOOKBEHINDNOT: case META_LOOKBEHIND_NA: @@ -4051,6 +4730,12 @@ while (ptr < ptrend) top_nest->flags = NSF_ATOMICSR; top_nest->options = options & PARSE_TRACKED_OPTIONS; top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + +#ifdef PCRE2_DEBUG + /* We'll write out two META_KETs for a single ")" in the input + pattern, so we reserve space for that in our bounds check. */ + parsed_pattern_extra++; +#endif } break; #else /* SUPPORT_UNICODE */ @@ -4110,6 +4795,11 @@ while (ptr < ptrend) verbstartptr = parsed_pattern; okquantifier = (verbs[i].meta == META_ACCEPT); +#ifdef PCRE2_DEBUG + /* Reserve space in our bounds check for optionally wrapping the (*ACCEPT) + with a non-capturing bracket, if there is a following quantifier. */ + if (okquantifier) parsed_pattern_extra += 2; +#endif /* It appears that Perl allows any characters whatsoever, other than a closing parenthesis, to appear in arguments ("names"), so we no longer @@ -4417,11 +5107,7 @@ while (ptr < ptrend) (IS_DIGIT(*ptr))? -1:(int)(cb->bracount), /* + and - are relative */ MAX_GROUP_NUMBER, ERR61, &i, &errorcode)) goto FAILED; - if (i < 0) /* NB (?0) is permitted */ - { - errorcode = ERR15; /* Unknown group */ - goto FAILED_BACK; - } + PCRE2_ASSERT(i >= 0); /* NB (?0) is permitted, represented by i=0 */ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) goto UNCLOSED_PARENTHESIS; @@ -4449,6 +5135,12 @@ while (ptr < ptrend) /* ---- Callout with numerical or string argument ---- */ case CHAR_C: + if ((xoptions & PCRE2_EXTRA_NEVER_CALLOUT) != 0) + { + errorcode = ERR103; + goto FAILED; + } + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; /* If the previous item was a condition starting (?(? an assertion, @@ -4536,7 +5228,7 @@ while (ptr < ptrend) parsed_pattern += 3; /* Skip pattern info */ while (ptr < ptrend && IS_DIGIT(*ptr)) { - n = n * 10 + *ptr++ - CHAR_0; + n = n * 10 + (*ptr++ - CHAR_0); if (n > 255) { errorcode = ERR38; @@ -4607,6 +5299,7 @@ while (ptr < ptrend) if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i, &errorcode)) { + PCRE2_ASSERT(i >= 0); if (i <= 0) { errorcode = ERR15; @@ -4757,7 +5450,7 @@ while (ptr < ptrend) goto POST_ASSERTION; case CHAR_ASTERISK: - POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */ + POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (*napla: */ *parsed_pattern++ = META_LOOKAHEAD_NA; ptr++; goto POST_ASSERTION; @@ -4922,6 +5615,18 @@ while (ptr < ptrend) cb->named_groups[cb->names_found].isdup = (uint16_t)isdupname; cb->names_found++; break; + + + /* ---- Perl extended character class ---- */ + + /* These are of the form '(?[...])'. We handle these via the same parser + that consumes ordinary '[...]' classes, but with a flag set to activate + the extended behaviour. */ + + case CHAR_LEFT_SQUARE_BRACKET: + class_mode_state = CLASS_MODE_PERL_EXT; + c = *ptr++; + goto FROM_PERL_EXTENDED_CLASS; } /* End of (? switch */ break; /* End of ( handling */ @@ -4960,6 +5665,11 @@ while (ptr < ptrend) if ((top_nest->flags & NSF_ATOMICSR) != 0) { *parsed_pattern++ = META_KET; + +#ifdef PCRE2_DEBUG + PCRE2_ASSERT(parsed_pattern_extra > 0); + parsed_pattern_extra--; +#endif } if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; @@ -4968,7 +5678,7 @@ while (ptr < ptrend) if (nest_depth == 0) /* Unmatched closing parenthesis */ { errorcode = ERR22; - goto FAILED_BACK; + goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 } nest_depth--; *parsed_pattern++ = META_KET; @@ -4984,9 +5694,15 @@ if (inverbname && ptr >= ptrend) goto FAILED; } -/* Manage callout for the final item */ PARSED_END: + +PCRE2_ASSERT((parsed_pattern - parsed_pattern_check) + + (parsed_pattern_extra - parsed_pattern_extra_check) <= + max_parsed_pattern(ptr_check, ptr, utf, options)); + +/* Manage callout for the final item */ + parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout, parsed_pattern, cb); @@ -5009,6 +5725,7 @@ Otherwise we have unclosed parentheses. */ if (parsed_pattern >= parsed_pattern_end) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR63; /* Internal error (parsed pattern overflow) */ goto FAILED; } @@ -5114,419 +5831,12 @@ for (;;) code += code[1] + PRIV(OP_lengths)[*code]; break; - default: - return code; - } - } -/* Control never reaches here */ -} - - - -#ifdef SUPPORT_UNICODE -/************************************************* -* Get othercase range * -*************************************************/ - -/* This function is passed the start and end of a class range in UCP mode. For -single characters the range may be just one character long. The function -searches up the characters, looking for ranges of characters in the "other" -case. Each call returns the next one, updating the start address. A character -with multiple other cases is returned on its own with a special return value. - -Arguments: - cptr points to starting character value; updated - d end value - ocptr where to put start of othercase range - odptr where to put end of othercase range - restricted TRUE if caseless restriction applies - -Yield: -1 when no more - 0 when a range is returned - >0 the CASESET offset for char with multiple other cases; - for this return, *ocptr contains the original -*/ - -static int -get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, - uint32_t *odptr, BOOL restricted) -{ -uint32_t c, othercase, next; -unsigned int co; - -/* Find the first character that has an other case. If it has multiple other -cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the -multi-case entries that begin with ASCII values. In 32-bit mode, a value -greater than the Unicode maximum ends the range. */ - -for (c = *cptr; c <= d; c++) - { -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c > MAX_UTF_CODE_POINT) return -1; -#endif - if ((co = UCD_CASESET(c)) != 0 && - (!restricted || PRIV(ucd_caseless_sets)[co] > 127)) - { - *ocptr = c++; /* Character that has the set */ - *cptr = c; /* Rest of input range */ - return (int)co; - } - - /* This is not a valid multiple-case character. Check that the single other - case is different to the original. We don't need to check "restricted" here - because the non-ASCII characters with multiple cases that include an ASCII - character don't have a different "othercase". */ - - if ((othercase = UCD_OTHERCASE(c)) != c) break; - } - -if (c > d) return -1; /* Reached end of range */ - -/* Found a character that has a single other case. Search for the end of the -range, which is either the end of the input range, or a character that has zero -or more than one other cases. */ - -*ocptr = othercase; -next = othercase + 1; - -for (++c; c <= d; c++) - { - if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; - next++; - } - -*odptr = next - 1; /* End of othercase range */ -*cptr = c; /* Rest of input range */ -return 0; -} -#endif /* SUPPORT_UNICODE */ - - - -/************************************************* -* Add a character or range to a class (internal) * -*************************************************/ - -/* This function packages up the logic of adding a character or range of -characters to a class. The character values in the arguments will be within the -valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is -called only from within the "add to class" group of functions, some of which -are recursive and mutually recursive. The external entry point is -add_to_class(). - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb compile data - start start of range character - end end of range character - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, - uint32_t end) -{ -uint32_t c; -uint32_t classbits_end = (end <= 0xff ? end : 0xff); -unsigned int n8 = 0; - -/* If caseless matching is required, scan the range and process alternate -cases. In Unicode, there are 8-bit characters that have alternate cases that -are greater than 255 and vice-versa (though these may be ignored if caseless -restriction is in force). Sometimes we can just extend the original range. */ - -if ((options & PCRE2_CASELESS) != 0) - { -#ifdef SUPPORT_UNICODE - if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0) - { - int rc; - uint32_t oc, od; - - options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ - c = start; - - while ((rc = get_othercase_range(&c, end, &oc, &od, - (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) - { - /* Handle a single character that has more than one other case. */ - - if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, - options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); - - /* Do nothing if the other case range is within the original range. */ - - else if (oc >= cb->class_range_start && od <= cb->class_range_end) - continue; - - /* Extend the original range if there is overlap, noting that if oc < c, - we can't have od > end because a subrange is always shorter than the - basic range. Otherwise, use a recursive call to add the additional range. - */ - - else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ - else if (od > end && oc <= end + 1) - { - end = od; /* Extend upwards */ - if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); - } - else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, - cb, oc, od); - } - } - else -#else - (void)xoptions; /* Avoid compiler warning */ -#endif /* SUPPORT_UNICODE */ - - /* Not UTF mode */ - - for (c = start; c <= classbits_end; c++) - { - SETBIT(classbits, cb->fcc[c]); - n8++; - } - } - -/* Now handle the originally supplied range. Adjust the final value according -to the bit length - this means that the same lists of (e.g.) horizontal spaces -can be used in all cases. */ - -if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) - end = MAX_NON_UTF_CHAR; - -if (start > cb->class_range_start && end < cb->class_range_end) return n8; - -/* Use the bitmap for characters < 256. Otherwise use extra data.*/ - -for (c = start; c <= classbits_end; c++) - { - /* Regardless of start, c will always be <= 255. */ - SETBIT(classbits, c); - n8++; - } - -#ifdef SUPPORT_WIDE_CHARS -if (start <= 0xff) start = 0xff + 1; - -if (end >= start) - { - PCRE2_UCHAR *uchardata = *uchardptr; - -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UTF) != 0) - { - if (start < end) - { - *uchardata++ = XCL_RANGE; - uchardata += PRIV(ord2utf)(start, uchardata); - uchardata += PRIV(ord2utf)(end, uchardata); - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - uchardata += PRIV(ord2utf)(start, uchardata); - } - } - else -#endif /* SUPPORT_UNICODE */ - - /* Without UTF support, character values are constrained by the bit length, - and can only be > 256 for 16-bit and 32-bit libraries. */ - -#if PCRE2_CODE_UNIT_WIDTH == 8 - {} -#else - if (start < end) - { - *uchardata++ = XCL_RANGE; - *uchardata++ = start; - *uchardata++ = end; - } - else if (start == end) - { - *uchardata++ = XCL_SINGLE; - *uchardata++ = start; - } -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - *uchardptr = uchardata; /* Updata extra data pointer */ - } -#else /* SUPPORT_WIDE_CHARS */ - (void)uchardptr; /* Avoid compiler warning */ -#endif /* SUPPORT_WIDE_CHARS */ - -return n8; /* Number of 8-bit characters */ -} - - - -#ifdef SUPPORT_UNICODE -/************************************************* -* Add a list of characters to a class (internal) * -*************************************************/ - -/* This function is used for adding a list of case-equivalent characters to a -class when in UTF mode. This function is called only from within -add_to_class_internal(), with which it is mutually recursive. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of - case-equivalent characters to avoid including the one we - already know about - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, - unsigned int except) -{ -unsigned int n8 = 0; -while (p[0] < NOTACHAR) - { - unsigned int n = 0; - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; - n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - p[0], p[n]); - } - p += n + 1; - } -return n8; -} -#endif - - - -/************************************************* -* External entry point for add range to class * -*************************************************/ - -/* This function sets the overall range so that the internal functions can try -to avoid duplication when handling case-independence. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb compile data - start start of range character - end end of range character - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end) -{ -cb->class_range_start = start; -cb->class_range_end = end; -return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - start, end); -} - - -/************************************************* -* External entry point for add list to class * -*************************************************/ - -/* This function is used for adding a list of horizontal or vertical whitespace -characters to a class. The list must be in order so that ranges of characters -can be detected and handled appropriately. This function sets the overall range -so that the internal functions can try to avoid duplication when handling -case-independence. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - except character to omit; this is used when adding lists of - case-equivalent characters to avoid including the one we - already know about - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, - uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except) -{ -unsigned int n8 = 0; -while (p[0] < NOTACHAR) - { - unsigned int n = 0; - if (p[0] != except) - { - while(p[n+1] == p[0] + n + 1) n++; - cb->class_range_start = p[0]; - cb->class_range_end = p[n]; - n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, - p[0], p[n]); + default: + return code; } - p += n + 1; } -return n8; -} - - -/************************************************* -* Add characters not in a list to a class * -*************************************************/ - -/* This function is used for adding the complement of a list of horizontal or -vertical whitespace to a class. The list must be in order. - -Arguments: - classbits the bit map for characters < 256 - uchardptr points to the pointer for extra data - options the options bits - xoptions the extra options bits - cb contains pointers to tables etc. - p points to row of 32-bit values, terminated by NOTACHAR - -Returns: the number of < 256 characters added - the pointer to extra data is updated -*/ - -static unsigned int -add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, - uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p) -{ -BOOL utf = (options & PCRE2_UTF) != 0; -unsigned int n8 = 0; -if (p[0] > 0) - n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p[0] - 1); -while (p[0] < NOTACHAR) - { - while (p[1] == p[0] + 1) p++; - n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p[0] + 1, - (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); - p++; - } -return n8; +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ } @@ -5572,6 +5882,7 @@ have duplicate names. Give an internal error. */ if (i >= cb->names_found) { + PCRE2_DEBUG_UNREACHABLE(); *errorcodeptr = ERR53; cb->erroroffset = name - cb->start_pattern; return FALSE; @@ -5649,12 +5960,13 @@ uint32_t options = *optionsptr; /* May change dynamically */ uint32_t xoptions = *xoptionsptr; /* May change dynamically */ uint32_t firstcu, reqcu; uint32_t zeroreqcu, zerofirstcu; -uint32_t escape; uint32_t *pptr = *pptrptr; uint32_t meta, meta_arg; uint32_t firstcuflags, reqcuflags; uint32_t zeroreqcuflags, zerofirstcuflags; uint32_t req_caseopt, reqvary, tempreqvary; +/* Some opcodes, such as META_SCS_NUMBER or META_SCS_NAME, +depends on the previous value of offset. */ PCRE2_SIZE offset = 0; PCRE2_SIZE length_prevgroup = 0; PCRE2_UCHAR *code = *codeptr; @@ -5668,8 +5980,6 @@ BOOL had_accept = FALSE; BOOL matched_char = FALSE; BOOL previous_matched_char = FALSE; BOOL reset_caseful = FALSE; -const uint8_t *cbits = cb->cbits; -uint8_t classbits[32]; /* We can fish out the UTF setting once and for all into a BOOL, but we must not do this for other options (e.g. PCRE2_EXTENDED) that may change dynamically @@ -5682,17 +5992,6 @@ BOOL ucp = (options & PCRE2_UCP) != 0; BOOL utf = FALSE; #endif -/* Helper variables for OP_XCLASS opcode (for characters > 255). We define -class_uchardata always so that it can be passed to add_to_class() always, -though it will not be used in non-UTF 8-bit cases. This avoids having to supply -alternative calls for the different cases. */ - -PCRE2_UCHAR *class_uchardata; -#ifdef SUPPORT_WIDE_CHARS -BOOL xclass; -PCRE2_UCHAR *class_uchardata_base; -#endif - /* Set up the default and non-default settings for greediness */ greedy_default = ((options & PCRE2_UNGREEDY) != 0); @@ -5722,15 +6021,8 @@ req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; for (;; pptr++) { -#ifdef SUPPORT_WIDE_CHARS - BOOL xclass_has_prop; -#endif - BOOL negate_class; - BOOL should_flip_negation; - BOOL match_all_or_no_wide_chars; BOOL possessive_quantifier; BOOL note_group_empty; - int class_has_8bitchar; uint32_t mclength; uint32_t skipunits; uint32_t subreqcu, subfirstcu; @@ -5753,8 +6045,13 @@ for (;; pptr++) if (code > cb->start_workspace + cb->workspace_size - WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ { - *errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)? - ERR52 : ERR86; + if (code >= cb->start_workspace + cb->workspace_size) + { + PCRE2_DEBUG_UNREACHABLE(); + *errorcodeptr = ERR52; /* Over-ran workspace - internal error */ + } + else + *errorcodeptr = ERR86; return 0; } @@ -5860,13 +6157,24 @@ for (;; pptr++) /* ===================================================================*/ /* Empty character classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. Otherwise, an initial ']' is taken as a data character. When empty classes - are allowed, [] must always fail, so generate OP_FAIL, whereas [^] must - match any character, so generate OP_ALLANY. */ + are allowed, [] must generate an empty class - we have no dedicated opcode + to optimise the representation, but it's a rare case (the '(*FAIL)' + construct would be a clearer way for a pattern author to represent a + non-matching branch, but it does have different semantics to '[]' if both + are followed by a quantifier). The empty-negated [^] matches any character, + so is useful: generate OP_ALLANY for this. */ case META_CLASS_EMPTY: case META_CLASS_EMPTY_NOT: matched_char = TRUE; - *code++ = (meta == META_CLASS_EMPTY_NOT)? OP_ALLANY : OP_FAIL; + if (meta == META_CLASS_EMPTY_NOT) *code++ = OP_ALLANY; + else + { + *code++ = OP_CLASS; + memset(code, 0, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; zerofirstcu = firstcu; zerofirstcuflags = firstcuflags; @@ -5889,7 +6197,16 @@ for (;; pptr++) case META_CLASS_NOT: case META_CLASS: matched_char = TRUE; - negate_class = meta == META_CLASS_NOT; + + /* Check for complex extended classes and handle them separately. */ + + if ((*pptr & CLASS_IS_ECLASS) != 0) + { + if (!PRIV(compile_class_nested)(options, xoptions, &pptr, &code, + errorcodeptr, cb, lengthptr)) + return 0; + goto CLASS_END_PROCESSING; + } /* We can optimize the case of a single character in a class by generating OP_CHAR or OP_CHARI if it's positive, or OP_NOT or OP_NOTI if it's @@ -5902,585 +6219,132 @@ for (;; pptr++) if (pptr[1] < META_END && pptr[2] == META_CLASS_END) { -#ifdef SUPPORT_UNICODE - uint32_t d; -#endif uint32_t c = pptr[1]; pptr += 2; /* Move on to class end */ if (meta == META_CLASS) /* A positive one-char class can be */ - { /* handled as a normal literal character. */ - meta = c; /* Set up the character */ - goto NORMAL_CHAR_SET; - } - - /* Handle a negative one-character class */ - - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - - /* For caseless UTF or UCP mode, check whether this character has more - than one other case. If so, generate a special OP_NOTPROP item instead of - OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any - caseless set that starts with an ASCII character. */ - -#ifdef SUPPORT_UNICODE - if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 && - (d = UCD_CASESET(c)) != 0 && - ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || - PRIV(ucd_caseless_sets)[d] > 127)) - { - *code++ = OP_NOTPROP; - *code++ = PT_CLIST; - *code++ = d; - break; /* We are finished with this class */ - } -#endif - /* Char has only one other (usable) case, or UCP not available */ - - *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; - code += PUTCHAR(c, code); - break; /* We are finished with this class */ - } /* End of 1-char optimization */ - - /* Handle character classes that contain more than just one literal - character. If there are exactly two characters in a positive class, see if - they are case partners. This can be optimized to generate a caseless single - character match (which also sets first/required code units if relevant). - When casing restrictions apply, ignore a caseless set if both characters - are ASCII. */ - - if (meta == META_CLASS && pptr[1] < META_END && pptr[2] < META_END && - pptr[3] == META_CLASS_END) - { - uint32_t c = pptr[1]; - -#ifdef SUPPORT_UNICODE - if (UCD_CASESET(c) == 0 || - ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && - c < 128 && pptr[2] < 128)) -#endif - { - uint32_t d; - -#ifdef SUPPORT_UNICODE - if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else -#endif - { -#if PCRE2_CODE_UNIT_WIDTH != 8 - if (c > 255) d = c; else -#endif - d = TABLE_GET(c, cb->fcc, c); - } - - if (c != d && pptr[2] == d) - { - pptr += 3; /* Move on to class end */ - meta = c; - if ((options & PCRE2_CASELESS) == 0) - { - reset_caseful = TRUE; - options |= PCRE2_CASELESS; - req_caseopt = REQ_CASELESS; - } - goto CLASS_CASELESS_CHAR; - } - } - } - - /* If a non-extended class contains a negative special such as \S, we need - to flip the negation flag at the end, so that support for characters > 255 - works correctly (they are all included in the class). An extended class may - need to insert specific matching or non-matching code for wide characters. - */ - - should_flip_negation = match_all_or_no_wide_chars = FALSE; - - /* Extended class (xclass) will be used when characters > 255 - might match. */ - -#ifdef SUPPORT_WIDE_CHARS - xclass = FALSE; - class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ - class_uchardata_base = class_uchardata; /* Save the start */ -#endif - - /* For optimization purposes, we track some properties of the class: - class_has_8bitchar will be non-zero if the class contains at least one - character with a code point less than 256; xclass_has_prop will be TRUE if - Unicode property checks are present in the class. */ - - class_has_8bitchar = 0; -#ifdef SUPPORT_WIDE_CHARS - xclass_has_prop = FALSE; -#endif - - /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map - in a temporary bit of memory, in case the class contains fewer than two - 8-bit characters because in that case the compiled code doesn't use the bit - map. */ - - memset(classbits, 0, 32 * sizeof(uint8_t)); - - /* Process items until META_CLASS_END is reached. */ - - while ((meta = *(++pptr)) != META_CLASS_END) - { - /* Handle POSIX classes such as [:alpha:] etc. */ - - if (meta == META_POSIX || meta == META_POSIX_NEG) - { - BOOL local_negate = (meta == META_POSIX_NEG); - int posix_class = *(++pptr); - int taboffset, tabopt; - uint8_t pbits[32]; - - should_flip_negation = local_negate; /* Note negative special */ - - /* If matching is caseless, upper and lower are converted to alpha. - This relies on the fact that the class table starts with alpha, - lower, upper as the first 3 entries. */ - - if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - - /* When PCRE2_UCP is set, some of the POSIX classes are converted to - different escape sequences that use Unicode properties \p or \P. - Others that are not available via \p or \P have to generate - XCL_PROP/XCL_NOTPROP directly, which is done here. */ - -#ifdef SUPPORT_UNICODE - if ((options & PCRE2_UCP) != 0 && - (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) - { - switch(posix_class) - { - case PC_GRAPH: - case PC_PRINT: - case PC_PUNCT: - *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; - *class_uchardata++ = (PCRE2_UCHAR) - ((posix_class == PC_GRAPH)? PT_PXGRAPH : - (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); - *class_uchardata++ = 0; - xclass_has_prop = TRUE; - goto CONTINUE_CLASS; - - /* For the other POSIX classes (ex: ascii) we are going to - fall through to the non-UCP case and build a bit map for - characters with code points less than 256. However, if we are in - a negated POSIX class, characters with code points greater than - 255 must either all match or all not match, depending on whether - the whole class is not or is negated. For example, for - [[:^ascii:]... they must all match, whereas for [^[:^ascii:]... - they must not. - - In the special case where there are no xclass items, this is - automatically handled by the use of OP_CLASS or OP_NCLASS, but an - explicit range is needed for OP_XCLASS. Setting a flag here - causes the range to be generated later when it is known that - OP_XCLASS is required. In the 8-bit library this is relevant only in - utf mode, since no wide characters can exist otherwise. */ - - default: -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (utf) -#endif - match_all_or_no_wide_chars |= local_negate; - break; - } - } -#endif /* SUPPORT_UNICODE */ - - /* In the non-UCP case, or when UCP makes no difference, we build the - bit map for the POSIX class in a chunk of local store because we may - be adding and subtracting from it, and we don't want to subtract bits - that may be in the main map already. At the end we or the result into - the bit map that is being built. */ - - posix_class *= 3; - - /* Copy in the first table (always present) */ - - memcpy(pbits, cbits + posix_class_maps[posix_class], - 32 * sizeof(uint8_t)); - - /* If there is a second table, add or remove it as required. */ - - taboffset = posix_class_maps[posix_class + 1]; - tabopt = posix_class_maps[posix_class + 2]; - - if (taboffset >= 0) - { - if (tabopt >= 0) - for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; - else - for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; - } - - /* Now see if we need to remove any special characters. An option - value of 1 removes vertical space and 2 removes underscore. */ - - if (tabopt < 0) tabopt = -tabopt; - if (tabopt == 1) pbits[1] &= ~0x3c; - else if (tabopt == 2) pbits[11] &= 0x7f; - - /* Add the POSIX table or its complement into the main table that is - being built and we are done. */ - - if (local_negate) - for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]); - else - for (int i = 0; i < 32; i++) classbits[i] |= pbits[i]; - - /* Every class contains at least one < 256 character. */ - - class_has_8bitchar = 1; - goto CONTINUE_CLASS; /* End of POSIX handling */ - } - - /* Other than POSIX classes, the only items we should encounter are - \d-type escapes and literal characters (possibly as ranges). */ - - if (meta == META_BIGVALUE) - { - meta = *(++pptr); - goto CLASS_LITERAL; - } - - /* Any other non-literal must be an escape */ - - if (meta >= META_END) - { - if (META_CODE(meta) != META_ESCAPE) - { -#ifdef DEBUG_SHOW_PARSED - fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x " - "in character class\n", meta); -#endif - *errorcodeptr = ERR89; /* Internal error - unrecognized. */ - return 0; - } - escape = META_DATA(meta); - - /* Every class contains at least one < 256 character. */ - - class_has_8bitchar++; - - switch(escape) - { - case ESC_d: - for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; - break; - - case ESC_D: - should_flip_negation = TRUE; - for (int i = 0; i < 32; i++) - classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); - break; - - case ESC_w: - for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; - break; - - case ESC_W: - should_flip_negation = TRUE; - for (int i = 0; i < 32; i++) - classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); - break; - - /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl - 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was - previously set by something earlier in the character class. - Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so - we could just adjust the appropriate bit. From PCRE 8.34 we no - longer treat \s and \S specially. */ - - case ESC_s: - for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; - break; - - case ESC_S: - should_flip_negation = TRUE; - for (int i = 0; i < 32; i++) - classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); - break; - - /* When adding the horizontal or vertical space lists to a class, or - their complements, disable PCRE2_CASELESS, because it justs wastes - time, and in the "not-x" UTF cases can create unwanted duplicates in - the XCLASS list (provoked by characters that have more than one other - case and by both cases being in the same "not-x" sublist). */ - - case ESC_h: - (void)add_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), - NOTACHAR); - break; - - case ESC_H: - (void)add_not_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list)); - break; - - case ESC_v: - (void)add_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), - NOTACHAR); - break; - - case ESC_V: - (void)add_not_list_to_class(classbits, &class_uchardata, - options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list)); - break; - - /* If Unicode is not supported, \P and \p are not allowed and are - faulted at parse time, so will never appear here. */ - -#ifdef SUPPORT_UNICODE - case ESC_p: - case ESC_P: - { - uint32_t ptype = *(++pptr) >> 16; - uint32_t pdata = *pptr & 0xffff; - *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP; - *class_uchardata++ = ptype; - *class_uchardata++ = pdata; - xclass_has_prop = TRUE; - class_has_8bitchar--; /* Undo! */ - } - break; -#endif - } - - goto CONTINUE_CLASS; - } /* End handling \d-type escapes */ - - /* A literal character may be followed by a range meta. At parse time - there are checks for out-of-order characters, for ranges where the two - characters are equal, and for hyphens that cannot indicate a range. At - this point, therefore, no checking is needed. */ - - else - { - uint32_t c, d; - - CLASS_LITERAL: - c = d = meta; - - /* Remember if \r or \n were explicitly used */ - - if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; - - /* Process a character range */ - - if (pptr[1] == META_RANGE_LITERAL || pptr[1] == META_RANGE_ESCAPED) - { -#ifdef EBCDIC - BOOL range_is_literal = (pptr[1] == META_RANGE_LITERAL); -#endif - pptr += 2; - d = *pptr; - if (d == META_BIGVALUE) d = *(++pptr); - - /* Remember an explicit \r or \n, and add the range to the class. */ - - if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; - - /* In an EBCDIC environment, Perl treats alphabetic ranges specially - because there are holes in the encoding, and simply using the range - A-Z (for example) would include the characters in the holes. This - applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ - -#ifdef EBCDIC - if (range_is_literal && - (cb->ctypes[c] & ctype_letter) != 0 && - (cb->ctypes[d] & ctype_letter) != 0 && - (c <= CHAR_z) == (d <= CHAR_z)) - { - uint32_t uc = (d <= CHAR_z)? 0 : 64; - uint32_t C = c - uc; - uint32_t D = d - uc; - - if (C <= CHAR_i) - { - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); - C = CHAR_j; - } - - if (C <= D && C <= CHAR_r) - { - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); - C = CHAR_s; - } - - if (C <= D) - { - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, - cb, C + uc, D + uc); - } - } - else -#endif - /* Not an EBCDIC special range */ - - class_has_8bitchar += add_to_class(classbits, &class_uchardata, - options, xoptions, cb, c, d); - goto CONTINUE_CLASS; /* Go get the next char in the class */ - } /* End of range handling */ - - - /* Handle a single character. */ - - class_has_8bitchar += - add_to_class(classbits, &class_uchardata, options, xoptions, cb, - meta, meta); + { /* handled as a normal literal character. */ + meta = c; /* Set up the character */ + goto NORMAL_CHAR_SET; } - /* Continue to the next item in the class. */ + /* Handle a negative one-character class */ - CONTINUE_CLASS: + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; -#ifdef SUPPORT_WIDE_CHARS - /* If any wide characters or Unicode properties have been encountered, - set xclass = TRUE. Then, in the pre-compile phase, accumulate the length - of the extra data and reset the pointer. This is so that very large - classes that contain a zillion wide characters or Unicode property tests - do not overwrite the workspace (which is on the stack). */ + /* For caseless UTF or UCP mode, check whether this character has more + than one other case. If so, generate a special OP_NOTPROP item instead of + OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any + caseless set that starts with an ASCII character. If the character is + affected by the special Turkish rules, hardcode the not-matching + characters using a caseset. */ - if (class_uchardata > class_uchardata_base) +#ifdef SUPPORT_UNICODE + if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) { - xclass = TRUE; - if (lengthptr != NULL) + uint32_t caseset; + + if ((xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING && + UCD_ANY_I(c)) { - *lengthptr += class_uchardata - class_uchardata_base; - class_uchardata = class_uchardata_base; + caseset = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3); + } + else if ((caseset = UCD_CASESET(c)) != 0 && + (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + PRIV(ucd_caseless_sets)[caseset] < 128) + { + caseset = 0; /* Ignore the caseless set if it's restricted. */ + } + + if (caseset != 0) + { + *code++ = OP_NOTPROP; + *code++ = PT_CLIST; + *code++ = caseset; + break; /* We are finished with this class */ } } #endif + /* Char has only one other (usable) case, or UCP not available */ - continue; /* Needed to avoid error when not supporting wide chars */ - } /* End of main class-processing loop */ + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; + code += PUTCHAR(c, code); + break; /* We are finished with this class */ + } /* End of 1-char optimization */ - /* If this class is the first thing in the branch, there can be no first - char setting, whatever the repeat count. Any reqcu setting must remain - unchanged after any kind of repeat. */ + /* Handle character classes that contain more than just one literal + character. If there are exactly two characters in a positive class, see if + they are case partners. This can be optimized to generate a caseless single + character match (which also sets first/required code units if relevant). + When casing restrictions apply, ignore a caseless set if both characters + are ASCII. When Turkish casing applies, an 'i' does not match its normal + Unicode "othercase". */ - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; - zerofirstcu = firstcu; - zerofirstcuflags = firstcuflags; - zeroreqcu = reqcu; - zeroreqcuflags = reqcuflags; + if (meta == META_CLASS && pptr[1] < META_END && pptr[2] < META_END && + pptr[3] == META_CLASS_END) + { + uint32_t c = pptr[1]; - /* If there are characters with values > 255, or Unicode property settings - (\p or \P), we have to compile an extended class, with its own opcode, - unless there were no property settings and there was a negated special such - as \S in the class, and PCRE2_UCP is not set, because in that case all - characters > 255 are in or not in the class, so any that were explicitly - given as well can be ignored. - - In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were - were present in a class, we either have to match or not match all wide - characters (depending on whether the whole class is or is not negated). - This requirement is indicated by match_all_or_no_wide_chars being true. - We do this by including an explicit range, which works in both cases. - This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there - cannot be any wide characters in 8-bit non-UTF mode. - - When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit - class where \S etc is present without PCRE2_UCP, causing an extended class - to be compiled, we make sure that all characters > 255 are included by - forcing match_all_or_no_wide_chars to be true. - - If, when generating an xclass, there are no characters < 256, we can omit - the bitmap in the actual compiled code. */ - -#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */ - if (xclass && ( #ifdef SUPPORT_UNICODE - (options & PCRE2_UCP) != 0 || -#endif - xclass_has_prop || !should_flip_negation)) - { - if (match_all_or_no_wide_chars || ( -#if PCRE2_CODE_UNIT_WIDTH == 8 - utf && + if ((UCD_CASESET(c) == 0 || + ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + c < 128 && pptr[2] < 128)) && + !((xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING && + UCD_ANY_I(c))) #endif - should_flip_negation && !negate_class && (options & PCRE2_UCP) == 0)) { - *class_uchardata++ = XCL_RANGE; - if (utf) /* Will always be utf in the 8-bit library */ - { - class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); - class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata); - } - else /* Can only happen for the 16-bit & 32-bit libraries */ + uint32_t d; + +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else +#endif { -#if PCRE2_CODE_UNIT_WIDTH == 16 - *class_uchardata++ = 0x100; - *class_uchardata++ = 0xffffu; -#elif PCRE2_CODE_UNIT_WIDTH == 32 - *class_uchardata++ = 0x100; - *class_uchardata++ = 0xffffffffu; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) d = c; else #endif + d = TABLE_GET(c, cb->fcc, c); } - } - *class_uchardata++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; - *code = negate_class? XCL_NOT:0; - if (xclass_has_prop) *code |= XCL_HASPROP; - - /* If the map is required, move up the extra data to make room for it; - otherwise just move the code pointer to the end of the extra data. */ - if (class_has_8bitchar > 0) - { - *code++ |= XCL_MAP; - (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, - CU2BYTES(class_uchardata - code)); - if (negate_class && !xclass_has_prop) + if (c != d && pptr[2] == d) { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + pptr += 3; /* Move on to class end */ + meta = c; + if ((options & PCRE2_CASELESS) == 0) + { + reset_caseful = TRUE; + options |= PCRE2_CASELESS; + req_caseopt = REQ_CASELESS; + } + goto CLASS_CASELESS_CHAR; } - memcpy(code, classbits, 32); - code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); } - else code = class_uchardata; + } - /* Now fill in the complete length of the item */ + /* Now emit the OP_CLASS/OP_NCLASS/OP_XCLASS/OP_ALLANY opcode. */ - PUT(previous, 1, (int)(code - previous)); - break; /* End of class handling */ - } -#endif /* SUPPORT_WIDE_CHARS */ + pptr = PRIV(compile_class_not_nested)(options, xoptions, pptr + 1, + &code, meta == META_CLASS_NOT, NULL, + errorcodeptr, cb, lengthptr); + if (pptr == NULL) return 0; + PCRE2_ASSERT(*pptr == META_CLASS_END); - /* If there are no characters > 255, or they are all to be included or - excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the - whole class was negated and whether there were negative specials such as \S - (non-UCP) in the class. Then copy the 32-byte map into the code vector, - negating it if necessary. */ + CLASS_END_PROCESSING: - *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; - if (lengthptr == NULL) /* Save time in the pre-compile phase */ - { - if (negate_class) - { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; - } - memcpy(code, classbits, 32); - } - code += 32 / sizeof(PCRE2_UCHAR); + /* If this class is the first thing in the branch, there can be no first + char setting, whatever the repeat count. Any reqcu setting must remain + unchanged after any kind of repeat. */ + + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; break; /* End of class processing */ @@ -6586,6 +6450,15 @@ for (;; pptr++) req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; break; + case META_OFFSET: + GETPLUSOFFSET(offset, pptr); + break; + + case META_SCS: + bravalue = OP_ASSERT_SCS; + cb->assert_depth += 1; + goto GROUP_PROCESS; + /* ===================================================================*/ /* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous @@ -6597,6 +6470,7 @@ for (;; pptr++) case META_COND_RNUMBER: /* (?(Rdigits) */ case META_COND_NAME: /* (?(name) or (?'name') or ?() */ case META_COND_RNAME: /* (?(R&name) - test for recursion */ + case META_SCS_NAME: /* Name of scan substring */ bravalue = OP_COND; { int count, index; @@ -6605,7 +6479,10 @@ for (;; pptr++) named_group *ng = cb->named_groups; uint32_t length = *(++pptr); - GETPLUSOFFSET(offset, pptr); + if (meta == META_SCS_NAME) + offset += meta_arg; + else + GETPLUSOFFSET(offset, pptr); name = cb->start_pattern + offset; /* In the first pass, the names generated in the pre-pass are available, @@ -6615,34 +6492,21 @@ for (;; pptr++) numerical group. */ for (i = 0; i < cb->names_found; i++, ng++) - { if (length == ng->length && - PRIV(strncmp)(name, ng->name, length) == 0) - { - if (!ng->isdup) - { - code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; - PUT2(code, 2+LINK_SIZE, ng->number); - if (ng->number > cb->top_backref) cb->top_backref = ng->number; - skipunits = 1+IMM2_SIZE; - goto GROUP_PROCESS_NOTE_EMPTY; - } - break; /* Found a duplicated name */ - } - } - - /* If the name was not found we have a bad reference, unless we are - dealing with R, which is treated as a recursion test by number. - */ + PRIV(strncmp)(name, ng->name, length) == 0) break; if (i >= cb->names_found) { + /* If the name was not found we have a bad reference, unless we are + dealing with R, which is treated as a recursion test by + number. */ + groupnumber = 0; if (meta == META_COND_RNUMBER) { for (i = 1; i < length; i++) { - groupnumber = groupnumber * 10 + name[i] - CHAR_0; + groupnumber = groupnumber * 10 + (name[i] - CHAR_0); if (groupnumber > MAX_GROUP_NUMBER) { *errorcodeptr = ERR61; @@ -6669,11 +6533,26 @@ for (;; pptr++) skipunits = 1+IMM2_SIZE; goto GROUP_PROCESS_NOTE_EMPTY; } + else if (!ng->isdup) + { + /* Otherwise found a duplicated name */ + if (ng->number > cb->top_backref) cb->top_backref = ng->number; - /* A duplicated name was found. Note that if an R name is found - (META_COND_RNUMBER), it is a reference test, not a recursion test. */ + if (meta == META_SCS_NAME) + { + code[0] = OP_CREF; + PUT2(code, 1, ng->number); + code += 1+IMM2_SIZE; + break; + } - code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + PUT2(code, 2+LINK_SIZE, ng->number); + skipunits = 1+IMM2_SIZE; + if (meta != META_SCS_NAME) goto GROUP_PROCESS_NOTE_EMPTY; + cb->assert_depth += 1; + goto GROUP_PROCESS; + } /* We have a duplicated name. In the compile pass we have to search the main table in order to get the index and count values. */ @@ -6683,14 +6562,27 @@ for (;; pptr++) if (lengthptr == NULL && !find_dupname_details(name, length, &index, &count, errorcodeptr, cb)) return 0; - /* Add one to the opcode to change CREF/RREF into DNCREF/DNRREF and - insert appropriate data values. */ + if (meta == META_SCS_NAME) + { + code[0] = OP_DNCREF; + PUT2(code, 1, index); + PUT2(code, 1+IMM2_SIZE, count); + code += 1+2*IMM2_SIZE; + break; + } + + /* A duplicated name was found. Note that if an R name is found + (META_COND_RNUMBER), it is a reference test, not a recursion test. */ + + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_DNRREF : OP_DNCREF; - code[1+LINK_SIZE]++; + /* Insert appropriate data values. */ skipunits = 1+2*IMM2_SIZE; PUT2(code, 2+LINK_SIZE, index); PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); } + + PCRE2_ASSERT(meta != META_SCS_NAME); goto GROUP_PROCESS_NOTE_EMPTY; /* The DEFINE condition is always false. Its internal groups may never @@ -6707,8 +6599,13 @@ for (;; pptr++) /* Conditional test of a group's being set. */ case META_COND_NUMBER: + case META_SCS_NUMBER: bravalue = OP_COND; - GETPLUSOFFSET(offset, pptr); + if (meta == META_SCS_NUMBER) + offset += meta_arg; + else + GETPLUSOFFSET(offset, pptr); + groupnumber = *(++pptr); if (groupnumber > cb->bracount) { @@ -6717,7 +6614,17 @@ for (;; pptr++) return 0; } if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; - offset -= 2; /* Point at initial ( for too many branches error */ + + if (meta == META_SCS_NUMBER) + { + code[0] = OP_CREF; + PUT2(code, 1, groupnumber); + code += 1+IMM2_SIZE; + break; + } + + /* Point at initial ( for too many branches error */ + offset -= 2; code[1+LINK_SIZE] = OP_CREF; skipunits = 1+IMM2_SIZE; PUT2(code, 2+LINK_SIZE, groupnumber); @@ -6855,7 +6762,7 @@ for (;; pptr++) /* If we've just compiled an assertion, pop the assert depth. */ - if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NA) + if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERT_SCS) cb->assert_depth -= 1; /* At the end of compiling, code is still pointing to the start of the @@ -7094,6 +7001,11 @@ for (;; pptr++) *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF; PUT2INC(code, 0, index); PUT2INC(code, 0, count); + if ((options & PCRE2_CASELESS) != 0) + *code++ = (((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? + REFI_FLAG_CASELESS_RESTRICT : 0) | + (((xoptions & PCRE2_EXTRA_TURKISH_CASING) != 0)? + REFI_FLAG_TURKISH_CASING : 0); } break; @@ -7213,7 +7125,6 @@ for (;; pptr++) single-char opcodes. */ reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; - op_type = 0; /* Adjust first and required code units for a zero repeat. */ @@ -7254,6 +7165,7 @@ for (;; pptr++) /* Save start of previous item, in case we have to move it up in order to insert something before it, and remember what it was. */ + PCRE2_ASSERT(previous != NULL); tempcode = previous; op_previous = *previous; @@ -7313,6 +7225,7 @@ for (;; pptr++) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: #endif case OP_CLASS: case OP_NCLASS: @@ -7343,14 +7256,6 @@ for (;; pptr++) } break; - /* If previous is OP_FAIL, it was generated by an empty class [] - (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be - generated, that is by (*FAIL) or (?!), disallow a quantifier at parse - time. We can just ignore this repeat. */ - - case OP_FAIL: - goto END_REPEAT; - /* Prior to 10.30, repeated recursions were wrapped in OP_ONCE brackets because pcre2_match() could not handle backtracking into recursively called groups. Now that this backtracking is available, we no longer need @@ -7434,6 +7339,7 @@ for (;; pptr++) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -7758,9 +7664,10 @@ for (;; pptr++) here because it just makes it horribly messy. */ default: - if (op_previous >= OP_EODN) /* Not a character type - internal error */ + if (op_previous >= OP_EODN || op_previous <= OP_WORD_BOUNDARY) { - *errorcodeptr = ERR10; + PCRE2_DEBUG_UNREACHABLE(); + *errorcodeptr = ERR10; /* Not a character type - internal error */ return 0; } else @@ -7780,7 +7687,8 @@ for (;; pptr++) } else { - /* Come here from just above with a character in mcbuffer/mclength. */ + /* Come here from just above with a character in mcbuffer/mclength. + You must also set op_type before the jump. */ OUTPUT_SINGLE_REPEAT: prop_type = prop_value = -1; } @@ -7963,6 +7871,7 @@ for (;; pptr++) #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: tempcode += GET(tempcode, 1); break; #endif @@ -8047,6 +7956,11 @@ for (;; pptr++) if (firstcuflags == REQ_UNSET) zerofirstcuflags = firstcuflags = REQ_NONE; *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; PUT2INC(code, 0, meta_arg); + if ((options & PCRE2_CASELESS) != 0) + *code++ = (((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? + REFI_FLAG_CASELESS_RESTRICT : 0) | + (((xoptions & PCRE2_EXTRA_TURKISH_CASING) != 0)? + REFI_FLAG_TURKISH_CASING : 0); /* Update the map of back references, and keep the highest one. We could do this in parse_regex() for numerical back references, but not @@ -8139,12 +8053,30 @@ for (;; pptr++) uint32_t ptype = *(++pptr) >> 16; uint32_t pdata = *pptr & 0xffff; - /* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit - from the auto-anchoring code. */ + /* In caseless matching, particular characteristics Lu, Ll, and Lt get + converted to the general characteristic L&. That is, upper, lower, and + title case letters are all conflated. */ + + if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC && + (pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt)) + { + ptype = PT_LAMP; + pdata = 0; + } + + /* The special case of \p{Any} is compiled to OP_ALLANY and \P{Any} + is compiled to [] so as to benefit from the auto-anchoring code. */ - if (meta_arg == ESC_p && ptype == PT_ANY) + if (ptype == PT_ANY) { - *code++ = OP_ALLANY; + if (meta_arg == ESC_P) + { + *code++ = OP_CLASS; + memset(code, 0, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + else + *code++ = OP_ALLANY; } else { @@ -8213,9 +8145,7 @@ for (;; pptr++) default: if (meta >= META_END) { -#ifdef DEBUG_SHOW_PARSED - fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x\n", *pptr); -#endif + PCRE2_DEBUG_UNREACHABLE(); *errorcodeptr = ERR89; /* Internal error - unrecognized. */ return 0; } @@ -8231,15 +8161,28 @@ for (;; pptr++) /* For caseless UTF or UCP mode, check whether this character has more than one other case. If so, generate a special OP_PROP item instead of OP_CHARI. When casing restrictions apply, ignore caseless sets that start with an - ASCII character. */ + ASCII character. If the character is affected by the special Turkish rules, + hardcode the matching characters using a caseset. */ #ifdef SUPPORT_UNICODE if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) { - uint32_t caseset = UCD_CASESET(meta); - if (caseset != 0 && - ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || - PRIV(ucd_caseless_sets)[caseset] > 127)) + uint32_t caseset; + + if ((xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING && + UCD_ANY_I(meta)) + { + caseset = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(meta)? 0 : 3); + } + else if ((caseset = UCD_CASESET(meta)) != 0 && + (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + PRIV(ucd_caseless_sets)[caseset] < 128) + { + caseset = 0; /* Ignore the caseless set if it's restricted. */ + } + + if (caseset != 0) { *code++ = OP_PROP; *code++ = PT_CLIST; @@ -8337,7 +8280,8 @@ for (;; pptr++) } /* End of big switch */ } /* End of big loop */ -/* Control never reaches here. */ +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return 0; /* Avoid compiler warnings */ } @@ -8393,8 +8337,6 @@ uint32_t firstcu, reqcu; uint32_t lookbehindlength; uint32_t lookbehindminlength; uint32_t firstcuflags, reqcuflags; -uint32_t branchfirstcu, branchreqcu; -uint32_t branchfirstcuflags, branchreqcuflags; PCRE2_SIZE length; branch_chain bc; @@ -8463,9 +8405,11 @@ code += 1 + LINK_SIZE + skipunits; for (;;) { int branch_return; + uint32_t branchfirstcu = 0, branchreqcu = 0; + uint32_t branchfirstcuflags = REQ_UNSET, branchreqcuflags = REQ_UNSET; /* Insert OP_REVERSE or OP_VREVERSE if this is a lookbehind assertion. There - is only a single mimimum length for the whole assertion. When the mimimum + is only a single minimum length for the whole assertion. When the minimum length is LOOKBEHIND_MAX it means that all branches are of fixed length, though not necessarily the same length. In this case, the original OP_REVERSE can be used. It can also be used if a branch in a variable length lookbehind @@ -8577,10 +8521,10 @@ for (;;) { if (lengthptr == NULL) { - PCRE2_SIZE branch_length = code - last_branch; + uint32_t branch_length = (uint32_t)(code - last_branch); do { - PCRE2_SIZE prev_length = GET(last_branch, 1); + uint32_t prev_length = GET(last_branch, 1); PUT(last_branch, 1, branch_length); branch_length = prev_length; last_branch -= branch_length; @@ -8591,7 +8535,7 @@ for (;;) /* Fill in the ket */ *code = OP_KET; - PUT(code, 1, (int)(code - start_bracket)); + PUT(code, 1, (uint32_t)(code - start_bracket)); code += 1 + LINK_SIZE; /* Set values to pass back */ @@ -8642,7 +8586,9 @@ for (;;) lookbehindlength = META_DATA(*pptr); pptr++; } -/* Control never reaches here */ + +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return 0; /* Avoid compiler warnings */ } @@ -8685,13 +8631,14 @@ Arguments: cb points to the compile data block atomcount atomic group level inassert TRUE if in an assertion + dotstar_anchor TRUE if automatic anchoring optimization is enabled Returns: TRUE or FALSE */ static BOOL is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb, - int atomcount, BOOL inassert) + int atomcount, BOOL inassert, BOOL dotstar_anchor) { do { PCRE2_SPTR scode = first_significant_code( @@ -8703,7 +8650,7 @@ do { if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) { - if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } @@ -8714,14 +8661,14 @@ do { { int n = GET2(scode, 1+LINK_SIZE); uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; + if (!is_anchored(scode, new_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } /* Positive forward assertion */ else if (op == OP_ASSERT || op == OP_ASSERT_NA) { - if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) return FALSE; } /* Condition. If there is no second branch, it can't be anchored. */ @@ -8729,7 +8676,7 @@ do { else if (op == OP_COND || op == OP_SCOND) { if (scode[GET(scode,1)] != OP_ALT) return FALSE; - if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } @@ -8737,7 +8684,7 @@ do { else if (op == OP_ONCE) { - if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert)) + if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert, dotstar_anchor)) return FALSE; } @@ -8752,8 +8699,7 @@ do { op == OP_TYPEPOSSTAR)) { if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || inassert || - (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + atomcount > 0 || cb->had_pruneorskip || inassert || !dotstar_anchor) return FALSE; } @@ -8790,13 +8736,14 @@ Arguments: cb points to the compile data atomcount atomic group level inassert TRUE if in an assertion + dotstar_anchor TRUE if automatic anchoring optimization is enabled Returns: TRUE or FALSE */ static BOOL is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, - int atomcount, BOOL inassert) + int atomcount, BOOL inassert, BOOL dotstar_anchor) { do { PCRE2_SPTR scode = first_significant_code( @@ -8827,7 +8774,8 @@ do { return FALSE; default: /* Assertion */ - if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) + return FALSE; do scode += GET(scode, 1); while (*scode == OP_ALT); scode += 1 + LINK_SIZE; break; @@ -8841,7 +8789,7 @@ do { if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) { - if (!is_startline(scode, bracket_map, cb, atomcount, inassert)) + if (!is_startline(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE; } @@ -8852,14 +8800,15 @@ do { { int n = GET2(scode, 1+LINK_SIZE); unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1); - if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE; + if (!is_startline(scode, new_map, cb, atomcount, inassert, dotstar_anchor)) + return FALSE; } /* Positive forward assertions */ else if (op == OP_ASSERT || op == OP_ASSERT_NA) { - if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) return FALSE; } @@ -8867,7 +8816,7 @@ do { else if (op == OP_ONCE) { - if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert)) + if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert, dotstar_anchor)) return FALSE; } @@ -8881,8 +8830,7 @@ do { else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) { if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 || - atomcount > 0 || cb->had_pruneorskip || inassert || - (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + atomcount > 0 || cb->had_pruneorskip || inassert || !dotstar_anchor) return FALSE; } @@ -8916,8 +8864,8 @@ Arguments: Returns: pointer to the opcode for OP_RECURSE, or NULL if not found */ -static PCRE2_SPTR -find_recurse(PCRE2_SPTR code, BOOL utf) +static PCRE2_UCHAR * +find_recurse(PCRE2_UCHAR *code, BOOL utf) { for (;;) { @@ -8926,12 +8874,13 @@ for (;;) if (c == OP_RECURSE) return code; /* XCLASS is used for classes that cannot be represented just by a bit map. - This includes negated single high-valued characters. CALLOUT_STR is used for - callouts with string arguments. In both cases the length in the table is + This includes negated single high-valued characters. ECLASS is used for + classes that use set operations internally. CALLOUT_STR is used for + callouts with string arguments. In each case the length in the table is zero; the actual length is stored in the compiled code. */ - if (c == OP_XCLASS) code += GET(code, 1); - else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra @@ -9261,9 +9210,12 @@ for (;; pptr++) if (meta < META_END) continue; /* Literal */ break; - /* This should never occur. */ - case META_END: + + /* The parsed regex is malformed; we have reached the end and did + not find the end of the construct which we are skipping over. */ + + PCRE2_DEBUG_UNREACHABLE(); return NULL; /* The data for these items is variable in length. */ @@ -9272,19 +9224,9 @@ for (;; pptr++) if (META_DATA(*pptr) >= 10) pptr += SIZEOFFSET; break; - case META_ESCAPE: /* A few escapes are followed by data items. */ - switch (META_DATA(*pptr)) - { - case ESC_P: - case ESC_p: - pptr += 1; - break; - - case ESC_g: - case ESC_k: - pptr += 1 + SIZEOFFSET; - break; - } + case META_ESCAPE: + if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p) + pptr += 1; /* Skip prop data */ break; case META_MARK: /* Add the length of the name. */ @@ -9310,6 +9252,7 @@ for (;; pptr++) case META_COND_RNAME: case META_COND_RNUMBER: case META_COND_VERSION: + case META_SCS: case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: @@ -9337,8 +9280,8 @@ for (;; pptr++) if (meta >= sizeof(meta_extra_lengths)) return NULL; pptr += meta_extra_lengths[meta]; } -/* Control never reaches here */ -return pptr; + +PCRE2_UNREACHABLE(); /* Control never reaches here */ } @@ -9469,10 +9412,10 @@ for (;; pptr++) parsed_recurse_check *r; uint32_t *gptr, *gptrend; uint32_t escape; + uint32_t min, max; uint32_t group = 0; uint32_t itemlength = 0; uint32_t itemminlength = 0; - uint32_t min, max; if (*pptr < META_END) { @@ -9571,6 +9514,7 @@ for (;; pptr++) case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: + case META_SCS: *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr); if (*errcodeptr != 0) return -1; @@ -9602,7 +9546,9 @@ for (;; pptr++) break; /* A nested lookbehind does not contribute any length to this lookbehind, - but must itself be checked and have its lengths set. */ + but must itself be checked and have its lengths set. Note that + set_lookbehind_lengths() updates pptr, leaving it pointing to the final ket + of the group, so no need to update it here. */ case META_LOOKBEHIND: case META_LOOKBEHINDNOT: @@ -9838,7 +9784,8 @@ EXIT: return branchlength; PARSED_SKIP_FAILED: -*errcodeptr = ERR90; +PCRE2_DEBUG_UNREACHABLE(); +*errcodeptr = ERR90; /* Unhandled META code - internal error */ return -1; } @@ -9922,7 +9869,7 @@ possibly different) length. */ if (variable) { gbptr[1] = minlength; - if ((uint32_t)maxlength > cb->max_varlookbehind) + if ((PCRE2_SIZE)maxlength > cb->max_varlookbehind) { *errcodeptr = ERR100; cb->erroroffset = offset; @@ -9931,8 +9878,6 @@ if (variable) } else gbptr[1] = LOOKBEHIND_MAX; - -gbptr[1] = variable? minlength : LOOKBEHIND_MAX; return TRUE; } @@ -9978,11 +9923,18 @@ for (; *pptr != META_END; pptr++) switch (META_CODE(*pptr)) { default: + + /* The following erroroffset is a bogus but safe value. This branch should + be avoided by providing a proper implementation for all supported cases + below. */ + + PCRE2_DEBUG_UNREACHABLE(); + cb->erroroffset = 0; return ERR70; /* Unrecognized meta code */ case META_ESCAPE: if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p) - pptr += 1; + pptr += 1; /* Skip prop data */ break; case META_KET: @@ -9996,6 +9948,7 @@ for (; *pptr != META_END; pptr++) case META_ATOMIC: case META_CAPTURE: case META_COND_ASSERT: + case META_SCS: case META_LOOKAHEAD: case META_LOOKAHEADNOT: case META_LOOKAHEAD_NA: @@ -10033,6 +9986,7 @@ for (; *pptr != META_END; pptr++) case META_THEN: break; + case META_OFFSET: case META_RECURSE: pptr += SIZEOFFSET; break; @@ -10067,6 +10021,8 @@ for (; *pptr != META_END; pptr++) case META_BIGVALUE: case META_POSIX: case META_POSIX_NEG: + case META_SCS_NAME: + case META_SCS_NUMBER: pptr += 1; break; @@ -10089,6 +10045,9 @@ for (; *pptr != META_END; pptr++) pptr += 1 + pptr[1]; break; + /* Note that set_lookbehind_lengths() updates pptr, leaving it pointing to + the final ket of the group, so no need to update it here. */ + case META_LOOKBEHIND: case META_LOOKBEHINDNOT: case META_LOOKBEHIND_NA: @@ -10135,19 +10094,19 @@ compile_block cb; /* "Static" compile-time data */ const uint8_t *tables; /* Char tables base pointer */ PCRE2_UCHAR *code; /* Current pointer in compiled code */ -PCRE2_SPTR codestart; /* Start of compiled code */ +PCRE2_UCHAR * codestart; /* Start of compiled code */ PCRE2_SPTR ptr; /* Current pointer in pattern */ uint32_t *pptr; /* Current pointer in parsed pattern */ PCRE2_SIZE length = 1; /* Allow for final END opcode */ PCRE2_SIZE usedlength; /* Actual length used */ PCRE2_SIZE re_blocksize; /* Size of memory block */ -PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */ PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */ uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ uint32_t firstcu, reqcu; /* Value of first/req code unit */ uint32_t setflags = 0; /* NL and BSR set flags */ +uint32_t xoptions; /* Flags from context, modified */ uint32_t skipatstart; /* When checking (*UTF) etc */ uint32_t limit_heap = UINT32_MAX; @@ -10161,6 +10120,10 @@ int regexrc; /* Return from compile */ uint32_t i; /* Local loop counter */ +/* Enable all optimizations by default. */ +uint32_t optim_flags = ccontext != NULL ? ccontext->optimization_flags : + PCRE2_OPTIMIZATION_ALL; + /* Comments at the head of this file explain about these variables. */ uint32_t stack_groupinfo[GROUPINFO_DEFAULT_SIZE]; @@ -10224,6 +10187,7 @@ PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */ if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED))) patlen = PRIV(strlen)(pattern); +(void)zero_terminated; /* Silence compiler; only used if Valgrind enabled */ if (patlen > ccontext->max_pattern_length) { @@ -10231,6 +10195,18 @@ if (patlen > ccontext->max_pattern_length) return NULL; } +/* Optimization flags in 'options' can override those in the compile context. +This is because some options to disable optimizations were added before the +optimization flags word existed, and we need to continue supporting them +for backwards compatibility. */ + +if ((options & PCRE2_NO_AUTO_POSSESS) != 0) + optim_flags &= ~PCRE2_OPTIM_AUTO_POSSESS; +if ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + optim_flags &= ~PCRE2_OPTIM_DOTSTAR_ANCHOR; +if ((options & PCRE2_NO_START_OPTIMIZE) != 0) + optim_flags &= ~PCRE2_OPTIM_START_OPTIMIZE; + /* From here on, all returns from this function should end up going via the EXIT label. */ @@ -10269,6 +10245,11 @@ cb.start_code = cworkspace; cb.start_pattern = pattern; cb.start_workspace = cworkspace; cb.workspace_size = COMPILE_WORK_SIZE; +#ifdef SUPPORT_WIDE_CHARS +cb.cranges = NULL; +cb.next_cranges = NULL; +cb.char_lists_size = 0; +#endif /* Maximum back reference and backref bitmap. The bitmap records up to 31 back references to help in deciding whether (.*) can be treated as anchored or not. @@ -10302,6 +10283,7 @@ non-zero-terminated patterns. */ if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1)); #endif +xoptions = ccontext->extra_options; ptr = pattern; skipatstart = 0; @@ -10313,13 +10295,13 @@ if ((options & PCRE2_LITERAL) == 0) { for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) { - uint32_t c, pp; const pso *p = pso_list + i; if (patlen - skipatstart - 2 >= p->length && - PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name), - p->length) == 0) + PRIV(strncmp_c8)(ptr + skipatstart + 2, p->name, p->length) == 0) { + uint32_t c, pp; + skipatstart += p->length + 2; switch(p->type) { @@ -10327,6 +10309,10 @@ if ((options & PCRE2_LITERAL) == 0) cb.external_options |= p->value; break; + case PSO_XOPT: + xoptions |= p->value; + break; + case PSO_FLG: setflags |= p->value; break; @@ -10346,18 +10332,12 @@ if ((options & PCRE2_LITERAL) == 0) case PSO_LIMH: c = 0; pp = skipatstart; - if (!IS_DIGIT(ptr[pp])) - { - errorcode = ERR60; - ptr += pp; - goto HAD_EARLY_ERROR; - } - while (IS_DIGIT(ptr[pp])) + while (pp < patlen && IS_DIGIT(ptr[pp])) { if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ c = c*10 + (ptr[pp++] - CHAR_0); } - if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) + if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS) { errorcode = ERR60; ptr += pp; @@ -10366,14 +10346,45 @@ if ((options & PCRE2_LITERAL) == 0) if (p->type == PSO_LIMH) limit_heap = c; else if (p->type == PSO_LIMM) limit_match = c; else limit_depth = c; - skipatstart += pp - skipatstart; + skipatstart = ++pp; + break; + + case PSO_OPTMZ: + optim_flags &= ~(p->value); + + /* For backward compatibility the three original VERBs to disable + optimizations need to also update the corresponding bit in the + external options. */ + + switch(p->value) + { + case PCRE2_OPTIM_AUTO_POSSESS: + cb.external_options |= PCRE2_NO_AUTO_POSSESS; + break; + + case PCRE2_OPTIM_DOTSTAR_ANCHOR: + cb.external_options |= PCRE2_NO_DOTSTAR_ANCHOR; + break; + + case PCRE2_OPTIM_START_OPTIMIZE: + cb.external_options |= PCRE2_NO_START_OPTIMIZE; + break; + } + break; + + default: + /* All values in the enum need an explicit entry for this switch + but until a better way to prevent coding mistakes is invented keep + a catch all that triggers a debug build assert as a failsafe */ + PCRE2_DEBUG_UNREACHABLE(); } break; /* Out of the table scan loop */ } } if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */ } + PCRE2_ASSERT(skipatstart <= patlen); } /* End of pattern-start options; advance to start of real regex. */ @@ -10425,6 +10436,31 @@ if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0) goto HAD_EARLY_ERROR; } +/* PCRE2_EXTRA_TURKISH_CASING checks */ + +if ((xoptions & PCRE2_EXTRA_TURKISH_CASING) != 0) + { + if (!utf && !ucp) + { + errorcode = ERR104; + goto HAD_EARLY_ERROR; + } + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (!utf) + { + errorcode = ERR105; + goto HAD_EARLY_ERROR; + } +#endif + + if ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0) + { + errorcode = ERR106; + goto HAD_EARLY_ERROR; + } + } + /* Process the BSR setting. */ if (bsr == 0) bsr = ccontext->bsr_convention; @@ -10465,6 +10501,7 @@ switch(newline) break; default: + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR56; goto HAD_EARLY_ERROR; } @@ -10473,42 +10510,31 @@ switch(newline) their numerical equivalents, so that this information is always available for the remaining processing. (2) At the same time, parse the pattern and put a processed version into the parsed_pattern vector. This has escapes interpreted -and comments removed (amongst other things). +and comments removed (amongst other things). */ -In all but one case, when PCRE2_AUTO_CALLOUT is not set, the number of unsigned -32-bit ints in the parsed pattern is bounded by the length of the pattern plus -one (for the terminator) plus four if PCRE2_EXTRA_WORD or PCRE2_EXTRA_LINE is -set. The exceptional case is when running in 32-bit, non-UTF mode, when literal -characters greater than META_END (0x80000000) have to be coded as two units. In -this case, therefore, we scan the pattern to check for such values. */ - -#if PCRE2_CODE_UNIT_WIDTH == 32 -if (!utf) - { - PCRE2_SPTR p; - for (p = ptr; p < cb.end_pattern; p++) if (*p >= META_END) big32count++; - } -#endif +/* Ensure that the parsed pattern buffer is big enough. For many smaller +patterns the vector on the stack (which was set up above) can be used. */ -/* Ensure that the parsed pattern buffer is big enough. When PCRE2_AUTO_CALLOUT -is set we have to assume a numerical callout (4 elements) for each character -plus one at the end. This is overkill, but memory is plentiful these days. For -many smaller patterns the vector on the stack (which was set up above) can be -used. */ +parsed_size_needed = max_parsed_pattern(ptr, cb.end_pattern, utf, options); -parsed_size_needed = patlen - skipatstart + big32count; +/* Allow for 2x uint32_t at the start and 2 at the end, for +PCRE2_EXTRA_MATCH_WORD or PCRE2_EXTRA_MATCH_LINE (which are exclusive). */ if ((ccontext->extra_options & (PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_MATCH_LINE)) != 0) parsed_size_needed += 4; +/* When PCRE2_AUTO_CALLOUT is set we allow for one callout at the end. */ + if ((options & PCRE2_AUTO_CALLOUT) != 0) - parsed_size_needed = (parsed_size_needed + 1) * 5; + parsed_size_needed += 4; + +parsed_size_needed += 1; /* For the final META_END */ -if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) +if (parsed_size_needed > PARSED_PATTERN_DEFAULT_SIZE) { uint32_t *heap_parsed_pattern = ccontext->memctl.malloc( - (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data); + parsed_size_needed * sizeof(uint32_t), ccontext->memctl.memory_data); if (heap_parsed_pattern == NULL) { *errorptr = ERR21; @@ -10516,11 +10542,11 @@ if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) } cb.parsed_pattern = heap_parsed_pattern; } -cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1; +cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed; /* Do the parsing scan. */ -errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb); +errorcode = parse_regex(ptr, cb.external_options, xoptions, &has_lookbehind, &cb); if (errorcode != 0) goto HAD_CB_ERROR; /* If there are any lookbehinds, scan the parsed pattern to figure out their @@ -10589,7 +10615,7 @@ pptr = cb.parsed_pattern; code = cworkspace; *code = OP_BRA; -(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, +(void)compile_regex(cb.external_options, xoptions, &code, &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, NULL, &cb, &length); @@ -10597,7 +10623,13 @@ if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ /* This should be caught in compile_regex(), but just in case... */ +#if defined SUPPORT_WIDE_CHARS +PCRE2_ASSERT((cb.char_lists_size & 0x3) == 0); +if (length > MAX_PATTERN_SIZE || + MAX_PATTERN_SIZE - length < (cb.char_lists_size / sizeof(PCRE2_UCHAR))) +#else if (length > MAX_PATTERN_SIZE) +#endif { errorcode = ERR20; goto HAD_CB_ERROR; @@ -10608,9 +10640,22 @@ block for storing the compiled pattern and names table. Integer overflow should no longer be possible because nowadays we limit the maximum value of cb.names_found and cb.name_entry_size. */ -re_blocksize = sizeof(pcre2_real_code) + - CU2BYTES(length + - (PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size); +re_blocksize = + CU2BYTES((PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size); + +#if defined SUPPORT_WIDE_CHARS +if (cb.char_lists_size != 0) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + /* Align to 32 bit first. This ensures the + allocated area will also be 32 bit aligned. */ + re_blocksize = (PCRE2_SIZE)CLIST_ALIGN_TO(re_blocksize, sizeof(uint32_t)); +#endif + re_blocksize += cb.char_lists_size; + } +#endif + +re_blocksize += CU2BYTES(length); if (re_blocksize > ccontext->max_pattern_compiled_length) { @@ -10618,6 +10663,7 @@ if (re_blocksize > ccontext->max_pattern_compiled_length) goto HAD_CB_ERROR; } +re_blocksize += sizeof(pcre2_real_code); re = (pcre2_real_code *) ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data); if (re == NULL) @@ -10638,10 +10684,11 @@ re->tables = tables; re->executable_jit = NULL; memset(re->start_bitmap, 0, 32 * sizeof(uint8_t)); re->blocksize = re_blocksize; +re->code_start = re_blocksize - CU2BYTES(length); re->magic_number = MAGIC_NUMBER; re->compile_options = options; re->overall_options = cb.external_options; -re->extra_options = ccontext->extra_options; +re->extra_options = xoptions; re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->limit_heap = limit_heap; re->limit_match = limit_match; @@ -10656,12 +10703,12 @@ re->top_bracket = 0; re->top_backref = 0; re->name_entry_size = cb.name_entry_size; re->name_count = cb.names_found; +re->optimization_flags = optim_flags; /* The basic block is immediately followed by the name table, and the compiled code follows after that. */ -codestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) + - re->name_entry_size * re->name_count; +codestart = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start); /* Update the compile data block for the actual compile. The starting points of the name/number translation table and of the code are passed around in the @@ -10676,6 +10723,10 @@ cb.start_code = codestart; cb.req_varyopt = 0; cb.had_accept = FALSE; cb.had_pruneorskip = FALSE; +#ifdef SUPPORT_WIDE_CHARS +cb.char_lists_size = 0; +#endif + /* If any named groups were found, create the name/number table from the list created in the pre-pass. */ @@ -10694,7 +10745,7 @@ of the function here. */ pptr = cb.parsed_pattern; code = (PCRE2_UCHAR *)codestart; *code = OP_BRA; -regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, +regexrc = compile_regex(re->overall_options, re->extra_options, &code, &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, NULL, &cb, NULL); if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; @@ -10716,7 +10767,12 @@ memory as unaddressable, so that any out-of-bound reads can be detected. */ *code++ = OP_END; usedlength = code - codestart; -if (usedlength > length) errorcode = ERR23; else +if (usedlength > length) + { + PCRE2_DEBUG_UNREACHABLE(); + errorcode = ERR23; /* Overflow of code block - internal error */ + } +else { re->blocksize -= CU2BYTES(length - usedlength); #ifdef SUPPORT_VALGRIND @@ -10738,9 +10794,9 @@ if (errorcode == 0 && cb.had_recurse) int start = RSCAN_CACHE_SIZE; recurse_cache rc[RSCAN_CACHE_SIZE]; - for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf); + for (rcode = find_recurse(codestart, utf); rcode != NULL; - rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf)) + rcode = find_recurse(rcode + 1 + LINK_SIZE, utf)) { int p, groupnumber; @@ -10769,6 +10825,7 @@ if (errorcode == 0 && cb.had_recurse) rgroup = PRIV(find_bracket)(search_from, utf, groupnumber); if (rgroup == NULL) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR53; break; } @@ -10779,7 +10836,7 @@ if (errorcode == 0 && cb.had_recurse) } } - PUT(rcode, 1, rgroup - codestart); + PUT(rcode, 1, (uint32_t)(rgroup - codestart)); } } @@ -10798,10 +10855,14 @@ used in this code because at least one compiler gives a warning about loss of "const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the function call. */ -if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0) +if (errorcode == 0 && (optim_flags & PCRE2_OPTIM_AUTO_POSSESS) != 0) { PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; - if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80; + if (PRIV(auto_possessify)(temp, &cb) != 0) + { + PCRE2_DEBUG_UNREACHABLE(); + errorcode = ERR80; + } } /* Failed to compile, or error while post-processing. */ @@ -10814,18 +10875,21 @@ or anything else, such as starting with non-atomic .* when DOTALL is set and there are no occurrences of *PRUNE or *SKIP (though there is an option to disable this case). */ -if ((re->overall_options & PCRE2_ANCHORED) == 0 && - is_anchored(codestart, 0, &cb, 0, FALSE)) - re->overall_options |= PCRE2_ANCHORED; +if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + BOOL dotstar_anchor = ((optim_flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0); + if (is_anchored(codestart, 0, &cb, 0, FALSE, dotstar_anchor)) + re->overall_options |= PCRE2_ANCHORED; + } /* Set up the first code unit or startline flag, the required code unit, and -then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE -is set, as the data it would create will not be used. Note that a first code +then study the pattern. This code need not be obeyed if PCRE2_OPTIM_START_OPTIMIZE +is disabled, as the data it would create will not be used. Note that a first code unit (but not the startline flag) is useful for anchored patterns because it can still give a quick "no match" and also avoid searching for a last code unit. */ -if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) +if ((optim_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { int minminlength = 0; /* For minimal minlength from first/required CU */ @@ -10833,8 +10897,19 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) (these are not saved during the compile because they can cause conflicts with actual literals that follow). */ - if (firstcuflags >= REQ_NONE) - firstcu = find_firstassertedcu(codestart, &firstcuflags, 0); + if (firstcuflags >= REQ_NONE) { + uint32_t assertedcuflags = 0; + uint32_t assertedcu = find_firstassertedcu(codestart, &assertedcuflags, 0); + /* It would be wrong to use the asserted first code unit as `firstcu` for + * regexes which are able to match a 1-character string (e.g. /(?=a)b?a/) + * For that example, if we set both firstcu and reqcu to 'a', it would mean + * the subject string needs to be at least 2 characters long, which is wrong. + * With more analysis, we would be able to set firstcu in more cases. */ + if (assertedcuflags < REQ_NONE && assertedcu != reqcu) { + firstcu = assertedcu; + firstcuflags = assertedcuflags; + } + } /* Save the data for a first code unit. The existence of one means the minimum length must be at least 1. */ @@ -10855,8 +10930,8 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) } /* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise. - In 8-bit UTF mode, codepoints in the range 128-255 are introductory code - points and cannot have another case, but if UCP is set they may do. */ + In 8-bit UTF mode, code units in the range 128-255 are introductory code + units and cannot have another case, but if UCP is set they may do. */ #ifdef SUPPORT_UNICODE #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -10877,9 +10952,12 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) non-DOTALL matches when *PRUNE and SKIP are not present. (There is an option that disables this case.) */ - else if ((re->overall_options & PCRE2_ANCHORED) == 0 && - is_startline(codestart, 0, &cb, 0, FALSE)) - re->flags |= PCRE2_STARTLINE; + else if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + BOOL dotstar_anchor = ((optim_flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0); + if (is_startline(codestart, 0, &cb, 0, FALSE, dotstar_anchor)) + re->flags |= PCRE2_STARTLINE; + } /* Handle the "required code unit", if one is set. In the UTF case we can increment the minimum minimum length only if we are sure this really is a @@ -10939,6 +11017,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) if (PRIV(study)(re) != 0) { + PCRE2_DEBUG_UNREACHABLE(); errorcode = ERR31; goto HAD_CB_ERROR; } @@ -10961,6 +11040,10 @@ version of the pattern, free it before returning. Also free the list of named groups if a larger one had to be obtained, and likewise the group information vector. */ +#ifdef SUPPORT_UNICODE +PCRE2_ASSERT(cb.cranges == NULL); +#endif + EXIT: #ifdef SUPPORT_VALGRIND if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1)); @@ -10971,6 +11054,7 @@ if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE) ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data); if (cb.groupinfo != stack_groupinfo) ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data); + return re; /* Will be NULL after an error */ /* Errors discovered in parse_regex() set the offset value in the compile @@ -10983,12 +11067,28 @@ HAD_CB_ERROR: ptr = pattern + cb.erroroffset; HAD_EARLY_ERROR: +PCRE2_ASSERT(ptr >= pattern); /* Ensure we don't return invalid erroroffset */ +PCRE2_ASSERT(ptr <= (pattern + patlen)); *erroroffset = ptr - pattern; HAD_ERROR: *errorptr = errorcode; pcre2_code_free(re); re = NULL; + +#ifdef SUPPORT_WIDE_CHARS +if (cb.cranges != NULL) + { + class_ranges* cranges = cb.cranges; + do + { + class_ranges* next_cranges = cranges->next; + cb.cx->memctl.free(cranges, cb.cx->memctl.memory_data); + cranges = next_cranges; + } + while (cranges != NULL); + } +#endif goto EXIT; } diff --git a/src/pcre2_compile.h b/src/pcre2_compile.h new file mode 100644 index 0000000..c8bf610 --- /dev/null +++ b/src/pcre2_compile.h @@ -0,0 +1,280 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD +#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD + +#include "pcre2_internal.h" + +/* Compile time error code numbers. They are given names so that they can more +easily be tracked. When a new number is added, the tables called eint1 and +eint2 in pcre2posix.c may need to be updated, and a new error text must be +added to compile_error_texts in pcre2_error.c. Also, the error codes in +pcre2.h.in must be updated - their values are exactly 100 greater than these +values. */ + +enum { ERR0 = COMPILE_ERROR_BASE, + ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, + ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, + ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, + ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, + ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, + ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, + ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, + ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, + ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, + ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, + ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110, + ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 }; + +/* Code values for parsed patterns, which are stored in a vector of 32-bit +unsigned ints. Values less than META_END are literal data values. The coding +for identifying the item is in the top 16-bits, leaving 16 bits for the +additional data that some of them need. The META_CODE, META_DATA, and META_DIFF +macros are used to manipulate parsed pattern elements. + +NOTE: When these definitions are changed, the table of extra lengths for each +code (meta_extra_lengths) must be updated to remain in step. */ + +#define META_END 0x80000000u /* End of pattern */ + +#define META_ALT 0x80010000u /* alternation */ +#define META_ATOMIC 0x80020000u /* atomic group */ +#define META_BACKREF 0x80030000u /* Back ref */ +#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */ +#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */ +#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */ +#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */ +#define META_CAPTURE 0x80080000u /* Capturing parenthesis */ +#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */ +#define META_CLASS 0x800a0000u /* start non-empty class */ +#define META_CLASS_EMPTY 0x800b0000u /* empty class */ +#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */ +#define META_CLASS_END 0x800d0000u /* end of non-empty class */ +#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */ +#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */ +#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */ +#define META_COND_NAME 0x80110000u /* (?()... */ +#define META_COND_NUMBER 0x80120000u /* (?(digits)... */ +#define META_COND_RNAME 0x80130000u /* (?(R&name)... */ +#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */ +#define META_COND_VERSION 0x80150000u /* (?(VERSIONx.y)... */ +#define META_OFFSET 0x80160000u /* Setting offset for various + META codes (e.g. META_SCS_NAME) */ +#define META_SCS 0x80170000u /* (*scan_substring:... */ +#define META_SCS_NAME 0x80180000u /* Next of scan_substring */ +#define META_SCS_NUMBER 0x80190000u /* Next digits of scan_substring */ +#define META_DOLLAR 0x801a0000u /* $ metacharacter */ +#define META_DOT 0x801b0000u /* . metacharacter */ +#define META_ESCAPE 0x801c0000u /* \d and friends */ +#define META_KET 0x801d0000u /* closing parenthesis */ +#define META_NOCAPTURE 0x801e0000u /* no capture parens */ +#define META_OPTIONS 0x801f0000u /* (?i) and friends */ +#define META_POSIX 0x80200000u /* POSIX class item */ +#define META_POSIX_NEG 0x80210000u /* negative POSIX class item */ +#define META_RANGE_ESCAPED 0x80220000u /* range with at least one escape */ +#define META_RANGE_LITERAL 0x80230000u /* range defined literally */ +#define META_RECURSE 0x80240000u /* Recursion */ +#define META_RECURSE_BYNAME 0x80250000u /* (?&name) */ +#define META_SCRIPT_RUN 0x80260000u /* (*script_run:...) */ + +/* These must be kept together to make it easy to check that an assertion +is present where expected in a conditional group. */ + +#define META_LOOKAHEAD 0x80270000u /* (?= */ +#define META_LOOKAHEADNOT 0x80280000u /* (?! */ +#define META_LOOKBEHIND 0x80290000u /* (?<= */ +#define META_LOOKBEHINDNOT 0x802a0000u /* (?>16) + +/* Extended class management flags. */ + +#define CLASS_IS_ECLASS 0x1 + +/* Macro for the highest character value. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define MAX_UCHAR_VALUE 0xffu +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define MAX_UCHAR_VALUE 0xffffu +#else +#define MAX_UCHAR_VALUE 0xffffffffu +#endif + +#define GET_MAX_CHAR_VALUE(utf) \ + ((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE) + +/* Macro for setting individual bits in class bitmaps. */ + +#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7)) + +/* Macro for 8 bit specific checks. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define SELECT_VALUE8(value8, value) (value8) +#else +#define SELECT_VALUE8(value8, value) (value) +#endif + +/* Macro for aligning data. */ +#define CLIST_ALIGN_TO(base, align) \ + ((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1)) + +/* Structure for holding information about an OP_ECLASS internal operand. +An "operand" here could be just a single OP_[X]CLASS, or it could be some +complex expression; but it's some sequence of ECL_* codes which pushes one +value to the stack. */ +typedef struct { + /* The position of the operand - or NULL if (lengthptr != NULL). */ + PCRE2_UCHAR *code_start; + PCRE2_SIZE length; + /* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE); + otherwise zero if the operand is not atomic. */ + uint8_t op_single_type; + /* Regardless of whether it's a single code or not, we fully constant-fold + the bitmap for code points < 256. */ + class_bits_storage bits; +} eclass_op_info; + +/* Macros for the definitions below, to prevent name collisions. */ + +#define _pcre2_posix_class_maps PCRE2_SUFFIX(_pcre2_posix_class_maps) +#define _pcre2_update_classbits PCRE2_SUFFIX(_pcre2_update_classbits_) +#define _pcre2_compile_class_nested PCRE2_SUFFIX(_pcre2_compile_class_nested_) +#define _pcre2_compile_class_not_nested PCRE2_SUFFIX(_pcre2_compile_class_not_nested_) + + +/* Indices of the POSIX classes in posix_names, posix_name_lengths, +posix_class_maps, and posix_substitutes. They must be kept in sync. */ + +#define PC_DIGIT 7 +#define PC_GRAPH 8 +#define PC_PRINT 9 +#define PC_PUNCT 10 +#define PC_XDIGIT 13 + +extern const int PRIV(posix_class_maps)[]; + + +/* Set bits in classbits according to the property type */ + +void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated, + uint8_t *classbits); + +/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS +OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */ + +uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions, + uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap, + int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr); + +/* Compile the META codes in pptr into opcodes written to pcode. The pptr must +start at a META_CLASS or META_CLASS_NOT. + +The pptr will be left pointing at the matching META_CLASS_END. */ + +BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions, + uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr, + compile_block *cb, PCRE2_SIZE *lengthptr); + +#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */ + +/* End of pcre2_compile.h */ diff --git a/src/pcre2_compile_class.c b/src/pcre2_compile_class.c new file mode 100644 index 0000000..6a73bb9 --- /dev/null +++ b/src/pcre2_compile_class.c @@ -0,0 +1,2737 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_compile.h" + +typedef struct { + /* Option bits for eclass. */ + uint32_t options; + uint32_t xoptions; + /* Rarely used members. */ + int *errorcodeptr; + compile_block *cb; + /* Bitmap is needed. */ + BOOL needs_bitmap; +} eclass_context; + +/* Checks the allowed tokens at the end of a class structure in debug mode. +When a new token is not processed by all loops, and the token is equals to +a) one of the cases here: + the compiler will complain about a duplicated case value. +b) none of the cases here: + the loop without the handler will stop with an assertion failure. */ + +#ifdef PCRE2_DEBUG +#define CLASS_END_CASES(meta) \ + default: \ + PCRE2_ASSERT((meta) <= META_END); \ + /* Fall through */ \ + case META_CLASS: \ + case META_CLASS_NOT: \ + case META_CLASS_EMPTY: \ + case META_CLASS_EMPTY_NOT: \ + case META_CLASS_END: \ + case META_ECLASS_AND: \ + case META_ECLASS_OR: \ + case META_ECLASS_SUB: \ + case META_ECLASS_XOR: \ + case META_ECLASS_NOT: +#else +#define CLASS_END_CASES(meta) \ + default: +#endif + +#ifdef SUPPORT_WIDE_CHARS + +/* Heapsort algorithm. */ + +static void do_heapify(uint32_t *buffer, size_t size, size_t i) +{ +size_t max; +size_t left; +size_t right; +uint32_t tmp1, tmp2; + +while (TRUE) + { + max = i; + left = (i << 1) + 2; + right = left + 2; + + if (left < size && buffer[left] > buffer[max]) max = left; + if (right < size && buffer[right] > buffer[max]) max = right; + if (i == max) return; + + /* Swap items. */ + tmp1 = buffer[i]; + tmp2 = buffer[i + 1]; + buffer[i] = buffer[max]; + buffer[i + 1] = buffer[max + 1]; + buffer[max] = tmp1; + buffer[max + 1] = tmp2; + i = max; + } +} + +#ifdef SUPPORT_UNICODE + +#define PARSE_CLASS_UTF 0x1 +#define PARSE_CLASS_CASELESS_UTF 0x2 +#define PARSE_CLASS_RESTRICTED_UTF 0x4 +#define PARSE_CLASS_TURKISH_UTF 0x8 + +/* Get the range of nocase characters which includes the +'c' character passed as argument, or directly follows 'c'. */ + +static const uint32_t* +get_nocase_range(uint32_t c) +{ +uint32_t left = 0; +uint32_t right = PRIV(ucd_nocase_ranges_size); +uint32_t middle; + +if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right; + +while (TRUE) + { + /* Range end of the middle element. */ + middle = ((left + right) >> 1) | 0x1; + + if (PRIV(ucd_nocase_ranges)[middle] <= c) + left = middle + 1; + else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c) + right = middle - 1; + else + return PRIV(ucd_nocase_ranges) + (middle - 1); + } +} + +/* Get the list of othercase characters, which belongs to the passed range. +Create ranges from these characters, and append them to the buffer argument. */ + +static size_t +utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options, + uint32_t *buffer) +{ +uint32_t new_start = start; +uint32_t new_end = end; +uint32_t c = start; +const uint32_t *list; +uint32_t tmp[3]; +size_t result = 2; +const uint32_t *skip_range = get_nocase_range(c); +uint32_t skip_start = skip_range[0]; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_ASSERT(options & PARSE_CLASS_UTF); +#endif + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT; +#endif + +while (c <= end) + { + uint32_t co; + + if (c > skip_start) + { + c = skip_range[1]; + skip_range += 2; + skip_start = skip_range[0]; + continue; + } + + /* Compute caseless set. */ + + if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) == + PARSE_CLASS_TURKISH_UTF && + UCD_ANY_I(c)) + { + co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3); + } + else if ((co = UCD_CASESET(c)) != 0 && + (options & PARSE_CLASS_RESTRICTED_UTF) != 0 && + PRIV(ucd_caseless_sets)[co] < 128) + { + co = 0; /* Ignore the caseless set if it's restricted. */ + } + + if (co != 0) + list = PRIV(ucd_caseless_sets) + co; + else + { + co = UCD_OTHERCASE(c); + list = tmp; + tmp[0] = c; + tmp[1] = NOTACHAR; + + if (co != c) + { + tmp[1] = co; + tmp[2] = NOTACHAR; + } + } + c++; + + /* Add characters. */ + do + { +#if PCRE2_CODE_UNIT_WIDTH == 16 + if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue; +#endif + + if (*list < new_start) + { + if (*list + 1 == new_start) + { + new_start--; + continue; + } + } + else if (*list > new_end) + { + if (*list - 1 == new_end) + { + new_end++; + continue; + } + } + else continue; + + result += 2; + if (buffer != NULL) + { + buffer[0] = *list; + buffer[1] = *list; + buffer += 2; + } + } + while (*(++list) != NOTACHAR); + } + + if (buffer != NULL) + { + buffer[0] = new_start; + buffer[1] = new_end; + buffer += 2; + (void)buffer; + } + return result; +} + +#endif + +/* Add a character list to a buffer. */ + +static size_t +append_char_list(const uint32_t *p, uint32_t *buffer) +{ +const uint32_t *n; +size_t result = 0; + +while (*p != NOTACHAR) + { + n = p; + while (n[0] == n[1] - 1) n++; + + PCRE2_ASSERT(*p < 0xffff); + + if (buffer != NULL) + { + buffer[0] = *p; + buffer[1] = *n; + buffer += 2; + } + + result += 2; + p = n + 1; + } + + return result; +} + +static uint32_t +get_highest_char(uint32_t options) +{ +(void)options; /* Avoid compiler warning. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +return MAX_UTF_CODE_POINT; +#else +#ifdef SUPPORT_UNICODE +return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0); +#else +return MAX_UCHAR_VALUE; +#endif +#endif +} + +/* Add a negated character list to a buffer. */ +static size_t +append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer) +{ +const uint32_t *n; +uint32_t start = 0; +size_t result = 2; + +PCRE2_ASSERT(*p > 0); + +while (*p != NOTACHAR) + { + n = p; + while (n[0] == n[1] - 1) n++; + + PCRE2_ASSERT(*p < 0xffff); + + if (buffer != NULL) + { + buffer[0] = start; + buffer[1] = *p - 1; + buffer += 2; + } + + result += 2; + start = *n + 1; + p = n + 1; + } + + if (buffer != NULL) + { + buffer[0] = start; + buffer[1] = get_highest_char(options); + buffer += 2; + (void)buffer; + } + + return result; +} + +static uint32_t * +append_non_ascii_range(uint32_t options, uint32_t *buffer) +{ + if (buffer == NULL) return NULL; + + buffer[0] = 0x100; + buffer[1] = get_highest_char(options); + return buffer + 2; +} + +static size_t +parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer) +{ +size_t total_size = 0; +size_t size; +uint32_t meta_arg; +uint32_t start_char; + +while (TRUE) + { + switch (META_CODE(*ptr)) + { + case META_ESCAPE: + meta_arg = META_DATA(*ptr); + switch (meta_arg) + { + case ESC_D: + case ESC_W: + case ESC_S: + buffer = append_non_ascii_range(options, buffer); + total_size += 2; + break; + + case ESC_h: + size = append_char_list(PRIV(hspace_list), buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_H: + size = append_negated_char_list(PRIV(hspace_list), options, buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_v: + size = append_char_list(PRIV(vspace_list), buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_V: + size = append_negated_char_list(PRIV(vspace_list), options, buffer); + total_size += size; + if (buffer != NULL) buffer += size; + break; + + case ESC_p: + case ESC_P: + ptr++; + if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY) + { + if (buffer != NULL) + { + buffer[0] = 0; + buffer[1] = get_highest_char(options); + buffer += 2; + } + total_size += 2; + } + break; + } + ptr++; + continue; + case META_POSIX_NEG: + buffer = append_non_ascii_range(options, buffer); + total_size += 2; + ptr += 2; + continue; + case META_POSIX: + ptr += 2; + continue; + case META_BIGVALUE: + /* Character literal */ + ptr++; + break; + CLASS_END_CASES(*ptr) + if (*ptr >= META_END) return total_size; + break; + } + + start_char = *ptr; + + if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED) + { + ptr += 2; + PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE); + + if (*ptr == META_BIGVALUE) ptr++; + +#ifdef EBCDIC +#error "Missing EBCDIC support" +#endif + } + +#ifdef SUPPORT_UNICODE + if (options & PARSE_CLASS_CASELESS_UTF) + { + size = utf_caseless_extend(start_char, *ptr++, options, buffer); + if (buffer != NULL) buffer += size; + total_size += size; + continue; + } +#endif + + if (buffer != NULL) + { + buffer[0] = start_char; + buffer[1] = *ptr; + buffer += 2; + } + + ptr++; + total_size += 2; + } + + return total_size; +} + +/* Extra uint32_t values for storing the lengths of range lists in +the worst case. Two uint32_t lengths and a range end for a range +starting before 255 */ +#define CHAR_LIST_EXTRA_SIZE 3 + +/* Starting character values for each character list. */ + +static const uint32_t char_list_starts[] = { +#if PCRE2_CODE_UNIT_WIDTH == 32 + XCL_CHAR_LIST_HIGH_32_START, +#endif +#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE + XCL_CHAR_LIST_LOW_32_START, +#endif + XCL_CHAR_LIST_HIGH_16_START, + /* Must be terminated by XCL_CHAR_LIST_LOW_16_START, + which also represents the end of the bitset. */ + XCL_CHAR_LIST_LOW_16_START, +}; + +static class_ranges * +compile_optimize_class(uint32_t *start_ptr, uint32_t options, + uint32_t xoptions, compile_block *cb) +{ +class_ranges* cranges; +uint32_t *ptr; +uint32_t *buffer; +uint32_t *dst; +uint32_t class_options = 0; +size_t range_list_size = 0, total_size, i; +uint32_t tmp1, tmp2; +const uint32_t *char_list_next; +uint16_t *next_char; +uint32_t char_list_start, char_list_end; +uint32_t range_start, range_end; + +#ifdef SUPPORT_UNICODE +if (options & PCRE2_UTF) + class_options |= PARSE_CLASS_UTF; + +if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP))) + class_options |= PARSE_CLASS_CASELESS_UTF; + +if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) + class_options |= PARSE_CLASS_RESTRICTED_UTF; + +if (xoptions & PCRE2_EXTRA_TURKISH_CASING) + class_options |= PARSE_CLASS_TURKISH_UTF; +#endif + +/* Compute required space for the range. */ + +range_list_size = parse_class(start_ptr, class_options, NULL); +PCRE2_ASSERT((range_list_size & 0x1) == 0); + +/* Allocate buffer. The total_size also represents the end of the buffer. */ + +total_size = range_list_size + + ((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0); + +cranges = cb->cx->memctl.malloc( + sizeof(class_ranges) + total_size * sizeof(uint32_t), + cb->cx->memctl.memory_data); + +if (cranges == NULL) return NULL; + +cranges->next = NULL; +cranges->range_list_size = (uint16_t)range_list_size; +cranges->char_lists_types = 0; +cranges->char_lists_size = 0; +cranges->char_lists_start = 0; + +if (range_list_size == 0) return cranges; + +buffer = (uint32_t*)(cranges + 1); +parse_class(start_ptr, class_options, buffer); + +/* Using <= instead of == to help static analysis. */ +if (range_list_size <= 2) return cranges; + +/* In-place sorting of ranges. */ + +i = (((range_list_size >> 2) - 1) << 1); +while (TRUE) + { + do_heapify(buffer, range_list_size, i); + if (i == 0) break; + i -= 2; + } + +i = range_list_size - 2; +while (TRUE) + { + tmp1 = buffer[i]; + tmp2 = buffer[i + 1]; + buffer[i] = buffer[0]; + buffer[i + 1] = buffer[1]; + buffer[0] = tmp1; + buffer[1] = tmp2; + + do_heapify(buffer, i, 0); + if (i == 0) break; + i -= 2; + } + +/* Merge ranges whenever possible. */ +dst = buffer; +ptr = buffer + 2; +range_list_size -= 2; + +/* The second condition is a very rare corner case, where the end of the last +range is the maximum character. This range cannot be extended further. */ + +while (range_list_size > 0 && dst[1] != ~(uint32_t)0) + { + if (dst[1] + 1 < ptr[0]) + { + dst += 2; + dst[0] = ptr[0]; + dst[1] = ptr[1]; + } + else if (dst[1] < ptr[1]) dst[1] = ptr[1]; + + ptr += 2; + range_list_size -= 2; + } + +PCRE2_ASSERT(dst[1] <= get_highest_char(class_options)); + +/* When the number of ranges are less than six, +they are not converted to range lists. */ + +ptr = buffer; +while (ptr < dst && ptr[1] < 0x100) ptr += 2; +if (dst - ptr < (2 * (6 - 1))) + { + cranges->range_list_size = (uint16_t)(dst + 2 - buffer); + return cranges; + } + +/* Compute character lists structures. */ + +char_list_next = char_list_starts; +char_list_start = *char_list_next++; +#if PCRE2_CODE_UNIT_WIDTH == 32 +char_list_end = XCL_CHAR_LIST_HIGH_32_END; +#elif defined SUPPORT_UNICODE +char_list_end = XCL_CHAR_LIST_LOW_32_END; +#else +char_list_end = XCL_CHAR_LIST_HIGH_16_END; +#endif +next_char = (uint16_t*)(buffer + total_size); + +tmp1 = 0; +tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN; +PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN); +range_start = dst[0]; +range_end = dst[1]; + +while (TRUE) + { + if (range_start >= char_list_start) + { + if (range_start == range_end || range_end < char_list_end) + { + tmp1++; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END); + else + *(uint32_t*)(--next_char) = + (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END; + } + + if (range_start < range_end) + { + if (range_start > char_list_start) + { + tmp1++; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT); + else + *(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT); + } + else + cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2; + } + + PCRE2_ASSERT((uint32_t*)next_char >= dst + 2); + + if (dst > buffer) + { + dst -= 2; + range_start = dst[0]; + range_end = dst[1]; + continue; + } + + range_start = 0; + range_end = 0; + } + + if (range_end >= char_list_start) + { + PCRE2_ASSERT(range_start < char_list_start); + + if (range_end < char_list_end) + { + tmp1++; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END); + else + *(uint32_t*)(--next_char) = + (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END; + + PCRE2_ASSERT((uint32_t*)next_char >= dst + 2); + } + + cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2; + } + + if (tmp1 >= XCL_ITEM_COUNT_MASK) + { + cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2; + next_char--; + + if (char_list_start < XCL_CHAR_LIST_LOW_32_START) + *next_char = (uint16_t)tmp1; + else + *(uint32_t*)(--next_char) = tmp1; + } + else + cranges->char_lists_types |= tmp1 << tmp2; + + if (range_start < XCL_CHAR_LIST_LOW_16_START) break; + + PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN); + char_list_end = char_list_start - 1; + char_list_start = *char_list_next++; + tmp1 = 0; + tmp2 -= XCL_TYPE_BIT_LEN; + } + +if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2; +PCRE2_ASSERT((uint16_t*)dst <= next_char); + +cranges->char_lists_size = + (size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char); +cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer); +cranges->range_list_size = (uint16_t)(dst - buffer); +return cranges; +} + +#endif /* SUPPORT_WIDE_CHARS */ + +#ifdef SUPPORT_UNICODE + +void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated, + uint8_t *classbits) +{ +/* Update PRIV(xclass) when this function is changed. */ +int c, chartype; +const ucd_record *prop; +uint32_t gentype; +BOOL set_bit; + +if (ptype == PT_ANY) + { + if (!negated) memset(classbits, 0xff, 32); + return; + } + +for (c = 0; c < 256; c++) + { + prop = GET_UCD(c); + set_bit = FALSE; + (void)set_bit; + + switch (ptype) + { + case PT_LAMP: + chartype = prop->chartype; + set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt); + break; + + case PT_GC: + set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata); + break; + + case PT_PC: + set_bit = (prop->chartype == pdata); + break; + + case PT_SC: + set_bit = (prop->script == pdata); + break; + + case PT_SCX: + set_bit = (prop->script == pdata || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); + break; + + case PT_ALNUM: + gentype = PRIV(ucp_gentype)[prop->chartype]; + set_bit = (gentype == ucp_L || gentype == ucp_N); + break; + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_BYTE_CASES: + VSPACE_BYTE_CASES: + set_bit = TRUE; + break; + + default: + set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z); + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + gentype = PRIV(ucp_gentype)[chartype]; + set_bit = (gentype == ucp_L || gentype == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + break; + + case PT_UCNC: + set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || c >= 0xa0); + break; + + case PT_BIDICL: + set_bit = (UCD_BIDICLASS_PROP(prop) == pdata); + break; + + case PT_BOOL: + set_bit = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), pdata) != 0; + break; + + case PT_PXGRAPH: + chartype = prop->chartype; + gentype = PRIV(ucp_gentype)[chartype]; + set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf)); + break; + + case PT_PXPRINT: + chartype = prop->chartype; + set_bit = (chartype != ucp_Zl && chartype != ucp_Zp && + (PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf)); + break; + + case PT_PXPUNCT: + gentype = PRIV(ucp_gentype)[prop->chartype]; + set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S)); + break; + + default: + PCRE2_ASSERT(ptype == PT_PXXDIGIT); + set_bit = (c >= CHAR_0 && c <= CHAR_9) || + (c >= CHAR_A && c <= CHAR_F) || + (c >= CHAR_a && c <= CHAR_f); + break; + } + + if (negated) set_bit = !set_bit; + if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7)); + if ((c & 0x7) == 0x7) classbits++; + } +} + +#endif /* SUPPORT_UNICODE */ + + + +#ifdef SUPPORT_WIDE_CHARS + +/************************************************* +* XClass related properties * +*************************************************/ + +/* XClass needs to be generated. */ +#define XCLASS_REQUIRED 0x1 +/* XClass has 8 bit character. */ +#define XCLASS_HAS_8BIT_CHARS 0x2 +/* XClass has properties. */ +#define XCLASS_HAS_PROPS 0x4 +/* XClass has character lists. */ +#define XCLASS_HAS_CHAR_LISTS 0x8 +/* XClass matches to all >= 256 characters. */ +#define XCLASS_HIGH_ANY 0x10 + +#endif + + +/************************************************* +* Internal entry point for add range to class * +*************************************************/ + +/* This function sets the overall range for characters < 256. +It also handles non-utf case folding. + +Arguments: + options the options bits + xoptions the extra options bits + cb compile data + start start of range character + end end of range character + +Returns: cb->classbits is updated +*/ + +static void +add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb, + uint32_t start, uint32_t end) +{ +uint8_t *classbits = cb->classbits.classbits; +uint32_t c, byte_start, byte_end; +uint32_t classbits_end = (end <= 0xff ? end : 0xff); + +/* If caseless matching is required, scan the range and process alternate +cases. In Unicode, there are 8-bit characters that have alternate cases that +are greater than 255 and vice-versa (though these may be ignored if caseless +restriction is in force). Sometimes we can just extend the original range. */ + +if ((options & PCRE2_CASELESS) != 0) + { +#ifdef SUPPORT_UNICODE + /* UTF mode. This branch is taken if we don't support wide characters (e.g. + 8-bit library, without UTF), but we do treat those characters as Unicode + (if UCP flag is set). In this case, we only need to expand the character class + set to include the case pairs which are in the 0-255 codepoint range. */ + if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0) + { + BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) == + PCRE2_EXTRA_TURKISH_CASING; + if (start < 128) + { + uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127); + for (c = start; c <= lo_end; c++) + { + if (turkish_i && UCD_ANY_I(c)) continue; + SETBIT(classbits, cb->fcc[c]); + } + } + if (classbits_end >= 128) + { + uint32_t hi_start = (start > 128 ? start : 128); + for (c = hi_start; c <= classbits_end; c++) + { + uint32_t co = UCD_OTHERCASE(c); + if (co <= 0xff) SETBIT(classbits, co); + } + } + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (c = start; c <= classbits_end; c++) + SETBIT(classbits, cb->fcc[c]); + } + } + +/* Use the bitmap for characters < 256. Otherwise use extra data. */ + +byte_start = (start + 7) >> 3; +byte_end = (classbits_end + 1) >> 3; + +if (byte_start >= byte_end) + { + for (c = start; c <= classbits_end; c++) + /* Regardless of start, c will always be <= 255. */ + SETBIT(classbits, c); + return; + } + +for (c = byte_start; c < byte_end; c++) + classbits[c] = 0xff; + +byte_start <<= 3; +byte_end <<= 3; + +for (c = start; c < byte_start; c++) + SETBIT(classbits, c); + +for (c = byte_end; c <= classbits_end; c++) + SETBIT(classbits, c); +} + + +#if PCRE2_CODE_UNIT_WIDTH == 8 +/************************************************* +* Internal entry point for add list to class * +*************************************************/ + +/* This function is used for adding a list of horizontal or vertical whitespace +characters to a class. The list must be in order so that ranges of characters +can be detected and handled appropriately. This function sets the overall range +so that the internal functions can try to avoid duplication when handling +case-independence. + +Arguments: + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +Returns: cb->classbits is updated +*/ + +static void +add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb, + const uint32_t *p) +{ +while (p[0] < 256) + { + unsigned int n = 0; + + while(p[n+1] == p[0] + n + 1) n++; + add_to_class(options, xoptions, cb, p[0], p[n]); + + p += n + 1; + } +} + + + +/************************************************* +* Add characters not in a list to a class * +*************************************************/ + +/* This function is used for adding the complement of a list of horizontal or +vertical whitespace to a class. The list must be in order. + +Arguments: + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +Returns: cb->classbits is updated +*/ + +static void +add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb, + const uint32_t *p) +{ +if (p[0] > 0) + add_to_class(options, xoptions, cb, 0, p[0] - 1); +while (p[0] < 256) + { + while (p[1] == p[0] + 1) p++; + add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1); + p++; + } +} +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + + + +/************************************************* +* Main entry-point to compile a character class * +*************************************************/ + +/* This function consumes a "leaf", which is a set of characters that will +become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */ + +uint32_t * +PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions, + uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap, + int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr) +{ +uint32_t *pptr = start_ptr; +PCRE2_UCHAR *code = *pcode; +BOOL should_flip_negation; +const uint8_t *cbits = cb->cbits; +/* Some functions such as add_to_class() or eclass processing +expects that the bitset is stored in cb->classbits.classbits. */ +uint8_t *const classbits = cb->classbits.classbits; + +#ifdef SUPPORT_UNICODE +BOOL utf = (options & PCRE2_UTF) != 0; +#else /* No Unicode support */ +BOOL utf = FALSE; +#endif + +/* Helper variables for OP_XCLASS opcode (for characters > 255). */ + +#ifdef SUPPORT_WIDE_CHARS +uint32_t xclass_props; +PCRE2_UCHAR *class_uchardata; +class_ranges* cranges; +#endif + +/* If an XClass contains a negative special such as \S, we need to flip the +negation flag at the end, so that support for characters > 255 works correctly +(they are all included in the class). An XClass may need to insert specific +matching or non-matching code for wide characters. +*/ + +should_flip_negation = FALSE; + +/* XClass will be used when characters > 255 might match. */ + +#ifdef SUPPORT_WIDE_CHARS +xclass_props = 0; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +cranges = NULL; + +if (utf) +#endif + { + if (lengthptr != NULL) + { + cranges = compile_optimize_class(pptr, options, xoptions, cb); + + if (cranges == NULL) + { + *errorcodeptr = ERR21; + return NULL; + } + + /* Caching the pre-processed character ranges. */ + if (cb->next_cranges != NULL) + cb->next_cranges->next = cranges; + else + cb->cranges = cranges; + + cb->next_cranges = cranges; + } + else + { + /* Reuse the pre-processed character ranges. */ + cranges = cb->cranges; + PCRE2_ASSERT(cranges != NULL); + cb->cranges = cranges->next; + } + + if (cranges->range_list_size > 0) + { + const uint32_t *ranges = (const uint32_t*)(cranges + 1); + + if (ranges[0] <= 255) + xclass_props |= XCLASS_HAS_8BIT_CHARS; + + if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) && + ranges[cranges->range_list_size - 2] <= 256) + xclass_props |= XCLASS_HIGH_ANY; + } + } + +class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ +#endif /* SUPPORT_WIDE_CHARS */ + +/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map +in a temporary bit of memory, in case the class contains fewer than two +8-bit characters because in that case the compiled code doesn't use the bit +map. */ + +memset(classbits, 0, 32); + +/* Process items until end_ptr is reached. */ + +while (TRUE) + { + uint32_t meta = *(pptr++); + BOOL local_negate; + int posix_class; + int taboffset, tabopt; + class_bits_storage pbits; + uint32_t escape, c; + + /* Handle POSIX classes such as [:alpha:] etc. */ + switch (META_CODE(meta)) + { + case META_POSIX: + case META_POSIX_NEG: + + local_negate = (meta == META_POSIX_NEG); + posix_class = *(pptr++); + + if (local_negate) should_flip_negation = TRUE; /* Note negative special */ + + /* If matching is caseless, upper and lower are converted to alpha. + This relies on the fact that the class table starts with alpha, + lower, upper as the first 3 entries. */ + + if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) + posix_class = 0; + + /* When PCRE2_UCP is set, some of the POSIX classes are converted to + different escape sequences that use Unicode properties \p or \P. + Others that are not available via \p or \P have to generate + XCL_PROP/XCL_NOTPROP directly, which is done here. */ + +#ifdef SUPPORT_UNICODE + /* TODO This entire block of code here appears to be unreachable!? I simply + can't see how it can be hit, given that the frontend parser doesn't emit + META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */ + if ((options & PCRE2_UCP) != 0 && + (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) + { + uint32_t ptype; + + switch(posix_class) + { + case PC_GRAPH: + case PC_PRINT: + case PC_PUNCT: + ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH : + (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT; + + PRIV(update_classbits)(ptype, 0, local_negate, classbits); + + if ((xclass_props & XCLASS_HIGH_ANY) == 0) + { + if (lengthptr != NULL) + *lengthptr += 3; + else + { + *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; + *class_uchardata++ = (PCRE2_UCHAR)ptype; + *class_uchardata++ = 0; + } + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS; + } + continue; + + /* For the other POSIX classes (ex: ascii) we are going to + fall through to the non-UCP case and build a bit map for + characters with code points less than 256. However, if we are in + a negated POSIX class, characters with code points greater than + 255 must either all match or all not match, depending on whether + the whole class is not or is negated. For example, for + [[:^ascii:]... they must all match, whereas for [^[:^ascii:]... + they must not. + + In the special case where there are no xclass items, this is + automatically handled by the use of OP_CLASS or OP_NCLASS, but an + explicit range is needed for OP_XCLASS. Setting a flag here + causes the range to be generated later when it is known that + OP_XCLASS is required. In the 8-bit library this is relevant only in + utf mode, since no wide characters can exist otherwise. */ + + default: + break; + } + } +#endif /* SUPPORT_UNICODE */ + + /* In the non-UCP case, or when UCP makes no difference, we build the + bit map for the POSIX class in a chunk of local store because we may + be adding and subtracting from it, and we don't want to subtract bits + that may be in the main map already. At the end we or the result into + the bit map that is being built. */ + + posix_class *= 3; + + /* Copy in the first table (always present) */ + + memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32); + + /* If there is a second table, add or remove it as required. */ + + taboffset = PRIV(posix_class_maps)[posix_class + 1]; + tabopt = PRIV(posix_class_maps)[posix_class + 2]; + + if (taboffset >= 0) + { + if (tabopt >= 0) + for (int i = 0; i < 32; i++) + pbits.classbits[i] |= cbits[i + taboffset]; + else + for (int i = 0; i < 32; i++) + pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]); + } + + /* Now see if we need to remove any special characters. An option + value of 1 removes vertical space and 2 removes underscore. */ + + if (tabopt < 0) tabopt = -tabopt; + if (tabopt == 1) pbits.classbits[1] &= ~0x3c; + else if (tabopt == 2) pbits.classbits[11] &= 0x7f; + + /* Add the POSIX table or its complement into the main table that is + being built and we are done. */ + + { + uint32_t *classwords = cb->classbits.classwords; + + if (local_negate) + for (int i = 0; i < 8; i++) + classwords[i] |= (uint32_t)(~pbits.classwords[i]); + else + for (int i = 0; i < 8; i++) + classwords[i] |= pbits.classwords[i]; + } + +#ifdef SUPPORT_WIDE_CHARS + /* Every class contains at least one < 256 character. */ + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + continue; /* End of POSIX handling */ + + /* Other than POSIX classes, the only items we should encounter are + \d-type escapes and literal characters (possibly as ranges). */ + case META_BIGVALUE: + meta = *(pptr++); + break; + + case META_ESCAPE: + escape = META_DATA(meta); + + switch(escape) + { + case ESC_d: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; + break; + + case ESC_D: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); + break; + + case ESC_w: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; + break; + + case ESC_W: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); + break; + + /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl + 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was + previously set by something earlier in the character class. + Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so + we could just adjust the appropriate bit. From PCRE 8.34 we no + longer treat \s and \S specially. */ + + case ESC_s: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; + break; + + case ESC_S: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); + break; + + /* When adding the horizontal or vertical space lists to a class, or + their complements, disable PCRE2_CASELESS, because it justs wastes + time, and in the "not-x" UTF cases can create unwanted duplicates in + the XCLASS list (provoked by characters that have more than one other + case and by both cases being in the same "not-x" sublist). */ + + case ESC_h: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(hspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + case ESC_H: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(hspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + case ESC_v: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(vspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + case ESC_V: +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) break; +#endif + add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions, + cb, PRIV(vspace_list)); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + break; + + /* If Unicode is not supported, \P and \p are not allowed and are + faulted at parse time, so will never appear here. */ + +#ifdef SUPPORT_UNICODE + case ESC_p: + case ESC_P: + { + uint32_t ptype = *pptr >> 16; + uint32_t pdata = *(pptr++) & 0xffff; + + /* The "Any" is processed by PRIV(update_classbits)(). */ + if (ptype == PT_ANY) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (!utf && escape == ESC_p) memset(classbits, 0xff, 32); +#endif + continue; + } + + PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits); + + if ((xclass_props & XCLASS_HIGH_ANY) == 0) + { + if (lengthptr != NULL) + *lengthptr += 3; + else + { + *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP; + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; + } + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS; + } + } + continue; +#endif + } + +#ifdef SUPPORT_WIDE_CHARS + /* Every non-property class contains at least one < 256 character. */ + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + /* End handling \d-type escapes */ + continue; + + CLASS_END_CASES(meta) + /* Literals. */ + if (meta < META_END) break; + /* Non-literals: end of class contents. */ + goto END_PROCESSING; + } + + /* A literal character may be followed by a range meta. At parse time + there are checks for out-of-order characters, for ranges where the two + characters are equal, and for hyphens that cannot indicate a range. At + this point, therefore, no checking is needed. */ + + c = meta; + + /* Remember if \r or \n were explicitly used */ + + if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + + /* Process a character range */ + + if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED) + { + uint32_t d; + +#ifdef EBCDIC + BOOL range_is_literal = (*pptr == META_RANGE_LITERAL); +#endif + ++pptr; + d = *(pptr++); + if (d == META_BIGVALUE) d = *(pptr++); + + /* Remember an explicit \r or \n, and add the range to the class. */ + + if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) continue; + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + + /* In an EBCDIC environment, Perl treats alphabetic ranges specially + because there are holes in the encoding, and simply using the range + A-Z (for example) would include the characters in the holes. This + applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ + +#ifdef EBCDIC + if (range_is_literal && + (cb->ctypes[c] & ctype_letter) != 0 && + (cb->ctypes[d] & ctype_letter) != 0 && + (c <= CHAR_z) == (d <= CHAR_z)) + { + uint32_t uc = (d <= CHAR_z)? 0 : 64; + uint32_t C = c - uc; + uint32_t D = d - uc; + + if (C <= CHAR_i) + { + add_to_class(options, xoptions, cb, C + uc, + ((D < CHAR_i)? D : CHAR_i) + uc); + C = CHAR_j; + } + + if (C <= D && C <= CHAR_r) + { + add_to_class(options, xoptions, cb, C + uc, + ((D < CHAR_r)? D : CHAR_r) + uc); + C = CHAR_s; + } + + if (C <= D) + add_to_class(options, xoptions, cb, C + uc, D + uc); + } + else +#endif + /* Not an EBCDIC special range */ + + add_to_class(options, xoptions, cb, c, d); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + continue; + } /* End of range handling */ + + /* Character ranges are ignored when class_ranges is present. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (cranges != NULL) continue; + xclass_props |= XCLASS_HAS_8BIT_CHARS; +#endif + /* Handle a single character. */ + + add_to_class(options, xoptions, cb, meta, meta); +#else + PCRE2_ASSERT(cranges != NULL); +#endif + } /* End of main class-processing loop */ + +END_PROCESSING: + +#ifdef SUPPORT_WIDE_CHARS +PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 || + (xclass_props & XCLASS_HIGH_ANY) == 0); + +if (cranges != NULL) + { + uint32_t *range = (uint32_t*)(cranges + 1); + uint32_t *end = range + cranges->range_list_size; + + while (range < end && range[0] < 256) + { + PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0); + /* Add range to bitset. If we are in UTF or UCP mode, then clear the + caseless bit, because the cranges handle caselessness (only) in this + condition; see the condition for PARSE_CLASS_CASELESS_UTF in + compile_optimize_class(). */ + add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)? + (options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]); + + if (range[1] > 255) break; + range += 2; + } + + if (cranges->char_lists_size > 0) + { + /* The cranges structure is still used and freed later. */ + PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0); + xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS; + } + else + { + if ((xclass_props & XCLASS_HIGH_ANY) != 0) + { + PCRE2_ASSERT(range + 2 == end && range[0] <= 256 && + range[1] >= GET_MAX_CHAR_VALUE(utf)); + should_flip_negation = TRUE; + range = end; + } + + while (range < end) + { + uint32_t range_start = range[0]; + uint32_t range_end = range[1]; + + range += 2; + xclass_props |= XCLASS_REQUIRED; + + if (range_start < 256) range_start = 256; + + if (lengthptr != NULL) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + *lengthptr += 1; + + if (range_start < range_end) + *lengthptr += PRIV(ord2utf)(range_start, class_uchardata); + + *lengthptr += PRIV(ord2utf)(range_end, class_uchardata); + continue; + } +#endif /* SUPPORT_UNICODE */ + + *lengthptr += range_start < range_end ? 3 : 2; + continue; + } + +#ifdef SUPPORT_UNICODE + if (utf) + { + if (range_start < range_end) + { + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(range_start, class_uchardata); + } + else + *class_uchardata++ = XCL_SINGLE; + + class_uchardata += PRIV(ord2utf)(range_end, class_uchardata); + continue; + } +#endif /* SUPPORT_UNICODE */ + + /* Without UTF support, character values are constrained + by the bit length, and can only be > 256 for 16-bit and + 32-bit libraries. */ +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (range_start < range_end) + { + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = range_start; + } + else + *class_uchardata++ = XCL_SINGLE; + + *class_uchardata++ = range_end; +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + } + + if (lengthptr == NULL) + cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data); + } + } +#endif /* SUPPORT_WIDE_CHARS */ + +/* If there are characters with values > 255, or Unicode property settings +(\p or \P), we have to compile an extended class, with its own opcode, +unless there were no property settings and there was a negated special such +as \S in the class, and PCRE2_UCP is not set, because in that case all +characters > 255 are in or not in the class, so any that were explicitly +given as well can be ignored. + +In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were +were present in a class, we either have to match or not match all wide +characters (depending on whether the whole class is or is not negated). +This requirement is indicated by match_all_or_no_wide_chars being true. +We do this by including an explicit range, which works in both cases. +This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there +cannot be any wide characters in 8-bit non-UTF mode. + +When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit +class where \S etc is present without PCRE2_UCP, causing an extended class +to be compiled, we make sure that all characters > 255 are included by +forcing match_all_or_no_wide_chars to be true. + +If, when generating an xclass, there are no characters < 256, we can omit +the bitmap in the actual compiled code. */ + +#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */ +if ((xclass_props & XCLASS_REQUIRED) != 0) + { + PCRE2_UCHAR *previous = code; + + if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0) + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT:0; + if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP; + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ + + if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL) + { + if (negate_class) + { + uint32_t *classwords = cb->classbits.classwords; + for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i]; + } + + if (has_bitmap == NULL) + { + *code++ |= XCL_MAP; + (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, + CU2BYTES(class_uchardata - code)); + memcpy(code, classbits, 32); + code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); + } + else + { + code = class_uchardata; + if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0) + *has_bitmap = TRUE; + } + } + else code = class_uchardata; + + if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0) + { + /* Char lists size is an even number, because all items are 16 or 32 + bit values. The character list data is always aligned to 32 bits. */ + size_t char_lists_size = cranges->char_lists_size; + PCRE2_ASSERT((char_lists_size & 0x1) == 0 && + (cb->char_lists_size & 0x3) == 0); + + if (lengthptr != NULL) + { + char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t)); + +#if PCRE2_CODE_UNIT_WIDTH == 8 + *lengthptr += 2 + LINK_SIZE; +#else + *lengthptr += 1 + LINK_SIZE; +#endif + + cb->char_lists_size += char_lists_size; + + char_lists_size /= sizeof(PCRE2_UCHAR); + + /* Storage space for character lists is included + in the maximum pattern size. */ + if (*lengthptr > MAX_PATTERN_SIZE || + MAX_PATTERN_SIZE - *lengthptr < char_lists_size) + { + *errorcodeptr = ERR20; /* Pattern is too large */ + return NULL; + } + } + else + { + uint8_t *data; + + PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK); +#if PCRE2_CODE_UNIT_WIDTH == 8 + /* Encode as high / low bytes. */ + code[0] = (uint8_t)(XCL_LIST | + (cranges->char_lists_types >> 8)); + code[1] = (uint8_t)cranges->char_lists_types; + code += 2; +#else + *code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types); +#endif + + /* Character lists are stored in backwards direction from + byte code start. The non-dfa/dfa matchers can access these + lists using the byte code start stored in match blocks. + Each list is aligned to 32 bit with an optional unused + 16 bit value at the beginning of the character list. */ + + cb->char_lists_size += char_lists_size; + data = (uint8_t*)cb->start_code - cb->char_lists_size; + + memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start, + char_lists_size); + + /* Since character lists total size is less than MAX_PATTERN_SIZE, + their starting offset fits into a value which size is LINK_SIZE. */ + + char_lists_size = cb->char_lists_size; + PUT(code, 0, (uint32_t)(char_lists_size >> 1)); + code += LINK_SIZE; + +#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND + if ((char_lists_size & 0x2) != 0) + { + /* In debug the unused 16 bit value is set + to a fixed value and marked unused. */ + ((uint16_t*)data)[-1] = 0x5555; +#ifdef SUPPORT_VALGRIND + VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2); +#endif + } +#endif + + cb->char_lists_size = + CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t)); + + cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data); + } + } + + /* Now fill in the complete length of the item */ + + PUT(previous, 1, (int)(code - previous)); + goto DONE; /* End of class handling */ + } +#endif /* SUPPORT_WIDE_CHARS */ + +/* If there are no characters > 255, or they are all to be included or +excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the +whole class was negated and whether there were negative specials such as \S +(non-UCP) in the class. Then copy the 32-byte map into the code vector, +negating it if necessary. */ + +if (negate_class) + { + uint32_t *classwords = cb->classbits.classwords; + + for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i]; + } + +if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) && + cb->classbits.classwords[0] == ~(uint32_t)0) + { + const uint32_t *classwords = cb->classbits.classwords; + int i; + + for (i = 0; i < 8; i++) + if (classwords[i] != ~(uint32_t)0) break; + + if (i == 8) + { + *code++ = OP_ALLANY; + goto DONE; /* End of class handling */ + } + } + +*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; +memcpy(code, classbits, 32); +code += 32 / sizeof(PCRE2_UCHAR); + +DONE: +*pcode = code; +return pptr - 1; +} + + + +/* ===================================================================*/ +/* Here follows a block of ECLASS-compiling functions. You may well want to +read them from top to bottom; they are ordered from leafmost (at the top) to +outermost parser (at the bottom of the file). */ + +/* This function folds one operand using the negation operator. +The new, combined chunk of stack code is written out to *pop_info. */ + +static void +fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr, + BOOL preserve_classbits) +{ +/* If the chunk of stack code is already composed of multiple ops, we won't +descend in and try and propagate the negation down the tree. (That would lead +to O(n^2) compile-time, which could be exploitable with a malicious regex - +although maybe that's not really too much of a worry in a library that offers +an exponential-time matching function!) */ + +if (pop_info->op_single_type == 0) + { + if (lengthptr != NULL) + *lengthptr += 1; + else + pop_info->code_start[pop_info->length] = ECL_NOT; + pop_info->length += 1; + } + +/* Otherwise, it's a nice single-op item, so we can easily fold in the negation +without needing to produce an ECL_NOT. */ + +else if (pop_info->op_single_type == ECL_ANY || + pop_info->op_single_type == ECL_NONE) + { + pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)? + ECL_ANY : ECL_NONE; + if (lengthptr == NULL) + *(pop_info->code_start) = pop_info->op_single_type; + } +else + { + PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS && + pop_info->length >= 1 + LINK_SIZE + 1); + if (lengthptr == NULL) + pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT; + } + +if (!preserve_classbits) + { + for (int i = 0; i < 8; i++) + pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i]; + } +} + + + +/* This function folds together two operands using a binary operator. +The new, combined chunk of stack code is written out to *lhs_op_info. */ + +static void +fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info, + PCRE2_SIZE *lengthptr) +{ +switch (op) + { + /* ECL_AND truth table: + + LHS RHS RESULT + ---------------- + ANY * RHS + * ANY LHS + NONE * NONE + * NONE NONE + X Y X & Y + */ + + case ECL_AND: + if (rhs_op_info->op_single_type == ECL_ANY) + { + /* no-op: drop the RHS */ + } + else if (lhs_op_info->op_single_type == ECL_ANY) + { + /* no-op: drop the LHS, and memmove the RHS into its place */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + } + else if (rhs_op_info->op_single_type == ECL_NONE) + { + /* the result is ECL_NONE: write into the LHS */ + if (lengthptr == NULL) + lhs_op_info->code_start[0] = ECL_NONE; + lhs_op_info->length = 1; + lhs_op_info->op_single_type = ECL_NONE; + } + else if (lhs_op_info->op_single_type == ECL_NONE) + { + /* the result is ECL_NONE: drop the RHS */ + } + else + { + /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */ + if (lengthptr != NULL) + *lengthptr += 1; + else + { + PCRE2_ASSERT(rhs_op_info->code_start == + lhs_op_info->code_start + lhs_op_info->length); + rhs_op_info->code_start[rhs_op_info->length] = ECL_AND; + } + lhs_op_info->length += rhs_op_info->length + 1; + lhs_op_info->op_single_type = 0; + } + + for (int i = 0; i < 8; i++) + lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i]; + break; + + /* ECL_OR truth table: + + LHS RHS RESULT + ---------------- + ANY * ANY + * ANY ANY + NONE * RHS + * NONE LHS + X Y X | Y + */ + + case ECL_OR: + if (rhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the RHS */ + } + else if (lhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the LHS, and memmove the RHS into its place */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + } + else if (rhs_op_info->op_single_type == ECL_ANY) + { + /* the result is ECL_ANY: write into the LHS */ + if (lengthptr == NULL) + lhs_op_info->code_start[0] = ECL_ANY; + lhs_op_info->length = 1; + lhs_op_info->op_single_type = ECL_ANY; + } + else if (lhs_op_info->op_single_type == ECL_ANY) + { + /* the result is ECL_ANY: drop the RHS */ + } + else + { + /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */ + if (lengthptr != NULL) + *lengthptr += 1; + else + { + PCRE2_ASSERT(rhs_op_info->code_start == + lhs_op_info->code_start + lhs_op_info->length); + rhs_op_info->code_start[rhs_op_info->length] = ECL_OR; + } + lhs_op_info->length += rhs_op_info->length + 1; + lhs_op_info->op_single_type = 0; + } + + for (int i = 0; i < 8; i++) + lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i]; + break; + + /* ECL_XOR truth table: + + LHS RHS RESULT + ---------------- + ANY * !RHS + * ANY !LHS + NONE * RHS + * NONE LHS + X Y X ^ Y + */ + + case ECL_XOR: + if (rhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the RHS */ + } + else if (lhs_op_info->op_single_type == ECL_NONE) + { + /* no-op: drop the LHS, and memmove the RHS into its place */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + } + else if (rhs_op_info->op_single_type == ECL_ANY) + { + /* the result is !LHS: fold in the negation, and drop the RHS */ + /* Preserve the classbits, because we promise to deal with them later. */ + fold_negation(lhs_op_info, lengthptr, TRUE); + } + else if (lhs_op_info->op_single_type == ECL_ANY) + { + /* the result is !RHS: drop the LHS, memmove the RHS into its place, and + fold in the negation */ + if (lengthptr == NULL) + memmove(lhs_op_info->code_start, rhs_op_info->code_start, + CU2BYTES(rhs_op_info->length)); + lhs_op_info->length = rhs_op_info->length; + lhs_op_info->op_single_type = rhs_op_info->op_single_type; + + /* Preserve the classbits, because we promise to deal with them later. */ + fold_negation(lhs_op_info, lengthptr, TRUE); + } + else + { + /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */ + if (lengthptr != NULL) + *lengthptr += 1; + else + { + PCRE2_ASSERT(rhs_op_info->code_start == + lhs_op_info->code_start + lhs_op_info->length); + rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR; + } + lhs_op_info->length += rhs_op_info->length + 1; + lhs_op_info->op_single_type = 0; + } + + for (int i = 0; i < 8; i++) + lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i]; + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + break; + } +} + + + +static BOOL +compile_eclass_nested(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, + eclass_op_info *pop_info, PCRE2_SIZE *lengthptr); + +/* This function consumes a group of implicitly-unioned class elements. +These can be characters, ranges, properties, or nested classes, as long +as they are all joined by being placed adjacently. */ + +static BOOL +compile_class_operand(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +uint32_t *prev_ptr; +PCRE2_UCHAR *code = *pcode; +PCRE2_UCHAR *code_start = code; +PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0; +PCRE2_SIZE extra_length; +uint32_t meta = META_CODE(*ptr); + +switch (meta) + { + case META_CLASS_EMPTY_NOT: + case META_CLASS_EMPTY: + ++ptr; + pop_info->length = 1; + if ((meta == META_CLASS_EMPTY) == negated) + { + *code++ = pop_info->op_single_type = ECL_ANY; + memset(pop_info->bits.classbits, 0xff, 32); + } + else + { + *code++ = pop_info->op_single_type = ECL_NONE; + memset(pop_info->bits.classbits, 0, 32); + } + break; + + case META_CLASS: + case META_CLASS_NOT: + if ((*ptr & CLASS_IS_ECLASS) != 0) + { + if (!compile_eclass_nested(context, negated, &ptr, &code, + pop_info, lengthptr)) + return FALSE; + + PCRE2_ASSERT(*ptr == META_CLASS_END); + ptr++; + goto DONE; + } + + ptr++; + /* Fall through */ + + default: + /* Scan forward characters, ranges, and properties. + For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but + we still need to collect that fragment up into a "leaf" OP_CLASS. */ + + prev_ptr = ptr; + ptr = PRIV(compile_class_not_nested)( + context->options, context->xoptions, ptr, &code, + (meta != META_CLASS_NOT) == negated, &context->needs_bitmap, + context->errorcodeptr, context->cb, lengthptr); + if (ptr == NULL) return FALSE; + + /* We must have a 100% guarantee that ptr increases when + compile_class_operand() returns, even on Release builds, so that we can + statically prove our loops terminate. */ + if (ptr <= prev_ptr) + { + PCRE2_DEBUG_UNREACHABLE(); + return FALSE; + } + + /* If we fell through above, consume the closing ']'. */ + if (meta == META_CLASS || meta == META_CLASS_NOT) + { + PCRE2_ASSERT(*ptr == META_CLASS_END); + ptr++; + } + + /* Regardless of whether (lengthptr == NULL), some data will still be written + out to *pcode, which we need: we have to peek at it, to transform the opcode + into the ECLASS version (since we need to hoist up the bitmaps). */ + PCRE2_ASSERT(code > code_start); + extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0; + + /* Easiest case: convert OP_ALLANY to ECL_ANY */ + + if (*code_start == OP_ALLANY) + { + PCRE2_ASSERT(code - code_start == 1 && extra_length == 0); + pop_info->length = 1; + *code_start = pop_info->op_single_type = ECL_ANY; + memset(pop_info->bits.classbits, 0xff, 32); + } + + /* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to + ECL_NONE / ECL_ANY respectively. */ + + else if (*code_start == OP_CLASS || *code_start == OP_NCLASS) + { + PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) && + extra_length == 0); + pop_info->length = 1; + *code_start = pop_info->op_single_type = + (*code_start == OP_CLASS)? ECL_NONE : ECL_ANY; + memcpy(pop_info->bits.classbits, code_start + 1, 32); + /* Rewind the code pointer, but make sure we adjust *lengthptr, because we + do need to reserve that space (even though we only use it temporarily). */ + if (lengthptr != NULL) + *lengthptr += code - (code_start + 1); + code = code_start + 1; + + if (!context->needs_bitmap && *code_start == ECL_NONE) + { + uint32_t *classwords = pop_info->bits.classwords; + + for (int i = 0; i < 8; i++) + if (classwords[i] != 0) + { + context->needs_bitmap = TRUE; + break; + } + } + else + context->needs_bitmap = TRUE; + } + + /* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to + ECL_XCLASS. */ + + else + { + PCRE2_ASSERT(*code_start == OP_XCLASS); + *code_start = pop_info->op_single_type = ECL_XCLASS; + + PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1); + + memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32); + pop_info->length = (code - code_start) + extra_length; + } + + break; + } /* End of switch(meta) */ + +pop_info->code_start = (lengthptr == NULL)? code_start : NULL; + +if (lengthptr != NULL) + { + *lengthptr += code - code_start; + code = code_start; + } + +DONE: +PCRE2_ASSERT(lengthptr == NULL || (code == code_start)); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function consumes a group of implicitly-unioned class elements. +These can be characters, ranges, properties, or nested classes, as long +as they are all joined by being placed adjacently. */ + +static BOOL +compile_class_juxtaposition(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +PCRE2_UCHAR *code = *pcode; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* See compile_class_binary_loose() for comments on compile-time folding of +the "negated" flag. */ + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr)) + return FALSE; + +while (*ptr != META_CLASS_END && + !(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT)) + { + uint32_t op; + BOOL rhs_negated; + eclass_op_info rhs_op_info; + + if (negated) + { + /* !(A juxtapose B) -> !A && !B */ + op = ECL_AND; + rhs_negated = TRUE; + } + else + { + /* A juxtapose B -> A || B */ + op = ECL_OR; + rhs_negated = FALSE; + } + + /* An operand must follow the operator. */ + if (!compile_class_operand(context, rhs_negated, &ptr, &code, + &rhs_op_info, lengthptr)) + return FALSE; + + /* Convert infix to postfix (RPN). */ + fold_binary(op, pop_info, &rhs_op_info, lengthptr); + if (lengthptr == NULL) + code = pop_info->code_start + pop_info->length; + } + +PCRE2_ASSERT(lengthptr == NULL || code == start_code); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function consumes unary prefix operators. */ + +static BOOL +compile_class_unary(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +while (*ptr == META_ECLASS_NOT) + { + ++ptr; + negated = !negated; + } + +*pptr = ptr; +/* Because it's a non-empty class, there must be an operand. */ +if (!compile_class_juxtaposition(context, negated, pptr, pcode, + pop_info, lengthptr)) + return FALSE; + +PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code); +return TRUE; +} + + + +/* This function consumes tightly-binding binary operators. */ + +static BOOL +compile_class_binary_tight(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +PCRE2_UCHAR *code = *pcode; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* See compile_class_binary_loose() for comments on compile-time folding of +the "negated" flag. */ + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr)) + return FALSE; + +while (*ptr == META_ECLASS_AND) + { + uint32_t op; + BOOL rhs_negated; + eclass_op_info rhs_op_info; + + if (negated) + { + /* !(A && B) -> !A || !B */ + op = ECL_OR; + rhs_negated = TRUE; + } + else + { + /* A && B -> A && B */ + op = ECL_AND; + rhs_negated = FALSE; + } + + ++ptr; + + /* An operand must follow the operator. */ + if (!compile_class_unary(context, rhs_negated, &ptr, &code, + &rhs_op_info, lengthptr)) + return FALSE; + + /* Convert infix to postfix (RPN). */ + fold_binary(op, pop_info, &rhs_op_info, lengthptr); + if (lengthptr == NULL) + code = pop_info->code_start + pop_info->length; + } + +PCRE2_ASSERT(lengthptr == NULL || code == start_code); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function consumes loosely-binding binary operators. */ + +static BOOL +compile_class_binary_loose(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info, + PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +PCRE2_UCHAR *code = *pcode; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* We really want to fold the negation operator, if at all possible, so that +simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want +to produce a fully-folded expression, so that we can guarantee not to emit any +OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode). + +This has the consequence that with a little ingenuity, we can in fact avoid +emitting (nearly...) all cases of the "NOT" operator. Imagine that we have: + !(A ... +We have parsed the preceding "!", and we are about to parse the "A" operand. We +don't know yet whether there will even be a following binary operand! Both of +these are possibilities for what follows: + !(A && B) + !(A) +However, we can still fold the "!" into the "A" operand, because no matter what +the following binary operator will be, we can produce an expression which is +equivalent. */ + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_binary_tight(context, negated, &ptr, &code, + pop_info, lengthptr)) + return FALSE; + +while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR) + { + uint32_t op; + BOOL op_neg; + BOOL rhs_negated; + eclass_op_info rhs_op_info; + + if (negated) + { + /* The whole expression is being negated; we respond by unconditionally + negating the LHS A, before seeing what follows. And hooray! We can recover, + no matter what follows. */ + /* !(A || B) -> !A && !B */ + /* !(A -- B) -> !(A && !B) -> !A || B */ + /* !(A XOR B) -> !(!A XOR !B) -> !A XNOR !B */ + op = (*ptr == META_ECLASS_OR )? ECL_AND : + (*ptr == META_ECLASS_SUB)? ECL_OR : + /*ptr == META_ECLASS_XOR*/ ECL_XOR; + op_neg = (*ptr == META_ECLASS_XOR); + rhs_negated = *ptr != META_ECLASS_SUB; + } + else + { + /* A || B -> A || B */ + /* A -- B -> A && !B */ + /* A XOR B -> A XOR B */ + op = (*ptr == META_ECLASS_OR )? ECL_OR : + (*ptr == META_ECLASS_SUB)? ECL_AND : + /*ptr == META_ECLASS_XOR*/ ECL_XOR; + op_neg = FALSE; + rhs_negated = *ptr == META_ECLASS_SUB; + } + + ++ptr; + + /* An operand must follow the operator. */ + if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code, + &rhs_op_info, lengthptr)) + return FALSE; + + /* Convert infix to postfix (RPN). */ + fold_binary(op, pop_info, &rhs_op_info, lengthptr); + if (op_neg) fold_negation(pop_info, lengthptr, FALSE); + if (lengthptr == NULL) + code = pop_info->code_start + pop_info->length; + } + +PCRE2_ASSERT(lengthptr == NULL || code == start_code); + +*pptr = ptr; +*pcode = code; +return TRUE; +} + + + +/* This function converts the META codes in pptr into opcodes written to +pcode. The pptr must start at a META_CLASS or META_CLASS_NOT. + +The class is compiled as a left-associative sequence of operator +applications. + +The pptr will be left pointing at the matching META_CLASS_END. */ + +static BOOL +compile_eclass_nested(eclass_context *context, BOOL negated, + uint32_t **pptr, PCRE2_UCHAR **pcode, + eclass_op_info *pop_info, PCRE2_SIZE *lengthptr) +{ +uint32_t *ptr = *pptr; +#ifdef PCRE2_DEBUG +PCRE2_UCHAR *start_code = *pcode; +#endif + +/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */ +PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) || + *ptr == (META_CLASS_NOT | CLASS_IS_ECLASS)); + +if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS)) + negated = !negated; + +(*pptr)++; + +/* Because it's a non-empty class, there must be an operand at the start. */ +if (!compile_class_binary_loose(context, negated, pptr, pcode, + pop_info, lengthptr)) + return FALSE; + +PCRE2_ASSERT(**pptr == META_CLASS_END); +PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code); +return TRUE; +} + +BOOL +PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions, + uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr, + compile_block *cb, PCRE2_SIZE *lengthptr) +{ +eclass_context context; +eclass_op_info op_info; +PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0; +PCRE2_UCHAR *code = *pcode; +PCRE2_UCHAR *previous; +BOOL allbitsone = TRUE; + +context.needs_bitmap = FALSE; +context.options = options; +context.xoptions = xoptions; +context.errorcodeptr = errorcodeptr; +context.cb = cb; + +previous = code; +*code++ = OP_ECLASS; +code += LINK_SIZE; +*code++ = 0; /* Flags, currently zero. */ +if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr)) + return FALSE; + +if (lengthptr != NULL) + { + *lengthptr += code - previous; + code = previous; + /* (*lengthptr - previous_length) now holds the amount of buffer that + we require to make the call to compile_class_nested() with + lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out + before that call. */ + } + +/* Do some useful counting of what's in the bitmap. */ +for (int i = 0; i < 8; i++) + if (op_info.bits.classwords[i] != 0xffffffff) + { + allbitsone = FALSE; + break; + } + +/* After constant-folding the extended class syntax, it may turn out to be +a simple class after all. In that case, we can unwrap it from the +OP_ECLASS container - and in fact, we must do so, because in 8-bit +no-Unicode mode the matcher is compiled without support for OP_ECLASS. */ + +#ifndef SUPPORT_WIDE_CHARS +PCRE2_ASSERT(op_info.op_single_type != 0); +#else +if (op_info.op_single_type != 0) +#endif + { + /* Rewind back over the OP_ECLASS. */ + code = previous; + + /* If the bits are all ones, and the "high characters" are all matched + too, we use a special-cased encoding of OP_ALLANY. */ + + if (op_info.op_single_type == ECL_ANY && allbitsone) + { + /* Advancing code means rewinding lengthptr, at this point. */ + if (lengthptr != NULL) *lengthptr -= 1; + *code++ = OP_ALLANY; + } + + /* If the high bits are all matched / all not-matched, then we emit an + OP_NCLASS/OP_CLASS respectively. */ + + else if (op_info.op_single_type == ECL_ANY || + op_info.op_single_type == ECL_NONE) + { + PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR)); + + if (lengthptr != NULL) + { + if (required_len > (*lengthptr - previous_length)) + *lengthptr = previous_length + required_len; + } + + /* Advancing code means rewinding lengthptr, at this point. */ + if (lengthptr != NULL) *lengthptr -= required_len; + *code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS; + memcpy(code, op_info.bits.classbits, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + + /* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data + there, but, we pulled out its bitmap into op_info, so now we have to + put that back into the OP_XCLASS. */ + + else + { +#ifndef SUPPORT_WIDE_CHARS + PCRE2_DEBUG_UNREACHABLE(); +#else + BOOL need_map = context.needs_bitmap; + PCRE2_SIZE required_len; + + PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS); + required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0); + + if (lengthptr != NULL) + { + /* Don't unconditionally request all the space we need - we may + already have asked for more during processing of the ECLASS. */ + if (required_len > (*lengthptr - previous_length)) + *lengthptr = previous_length + required_len; + + /* The code we write out here won't be ignored, even during the + (lengthptr != NULL) phase, because if there's a following quantifier + it will peek backwards. So we do have to write out a (truncated) + OP_XCLASS, even on this branch. */ + *lengthptr -= 1 + LINK_SIZE + 1; + *code++ = OP_XCLASS; + PUT(code, 0, 1 + LINK_SIZE + 1); + code += LINK_SIZE; + *code++ = 0; + } + else + { + PCRE2_UCHAR *rest; + PCRE2_SIZE rest_len; + PCRE2_UCHAR flags; + + /* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */ + PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1); + rest = op_info.code_start + 1 + LINK_SIZE + 1; + rest_len = (op_info.code_start + op_info.length) - rest; + + /* First read any data we use, before memmove splats it. */ + flags = op_info.code_start[1 + LINK_SIZE]; + PCRE2_ASSERT((flags & XCL_MAP) == 0); + + /* Next do the memmove before any writes. */ + memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0), + rest, CU2BYTES(rest_len)); + + /* Finally write the header data. */ + *code++ = OP_XCLASS; + PUT(code, 0, (int)required_len); + code += LINK_SIZE; + *code++ = flags | (need_map? XCL_MAP : 0); + if (need_map) + { + memcpy(code, op_info.bits.classbits, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + code += rest_len; + } +#endif /* SUPPORT_WIDE_CHARS */ + } + } + +/* Otherwise, we're going to keep the OP_ECLASS. However, again we need +to do some adjustment to insert the bitmap if we have one. */ + +#ifdef SUPPORT_WIDE_CHARS +else + { + BOOL need_map = context.needs_bitmap; + PCRE2_SIZE required_len = + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length; + + if (lengthptr != NULL) + { + if (required_len > (*lengthptr - previous_length)) + *lengthptr = previous_length + required_len; + + /* As for the XCLASS branch above, we do have to write out a dummy + OP_ECLASS, because of the backwards peek by the quantifier code. Write + out a (truncated) OP_ECLASS, even on this branch. */ + *lengthptr -= 1 + LINK_SIZE + 1; + *code++ = OP_ECLASS; + PUT(code, 0, 1 + LINK_SIZE + 1); + code += LINK_SIZE; + *code++ = 0; + } + else + { + if (need_map) + { + PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1; + previous[1 + LINK_SIZE] |= ECL_MAP; + memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start, + CU2BYTES(code - map_start)); + memcpy(map_start, op_info.bits.classbits, 32); + code += 32 / sizeof(PCRE2_UCHAR); + } + PUT(previous, 1, (int)(code - previous)); + } + } +#endif /* SUPPORT_WIDE_CHARS */ + +*pcode = code; +return TRUE; +} + +/* End of pcre2_compile_class.c */ diff --git a/src/pcre2_config.c b/src/pcre2_config.c index 5ef103c..031981b 100644 --- a/src/pcre2_config.c +++ b/src/pcre2_config.c @@ -224,8 +224,8 @@ switch (what) XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. There are problems using an "obvious" approach like this: - XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) - XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE) + XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR) + XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE) because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion of STRING(). The C standard states: "If (before argument substitution) any diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 9edbd1b..2345145 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -130,7 +130,7 @@ return gcontext; /* A default compile context is set up to save having to initialize at run time when no context is supplied to the compile function. */ -const pcre2_compile_context PRIV(default_compile_context) = { +pcre2_compile_context PRIV(default_compile_context) = { { default_malloc, default_free, NULL }, /* Default memory handling */ NULL, /* Stack guard */ NULL, /* Stack guard data */ @@ -141,7 +141,8 @@ const pcre2_compile_context PRIV(default_compile_context) = { NEWLINE_DEFAULT, /* Newline convention */ PARENS_NEST_LIMIT, /* As it says */ 0, /* Extra options */ - MAX_VARLOOKBEHIND /* As it says */ + MAX_VARLOOKBEHIND, /* As it says */ + PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */ }; /* The create function copies the default into the new memory, but must @@ -163,7 +164,7 @@ return ccontext; /* A default match context is set up to save having to initialize at run time when no context is supplied to a match function. */ -const pcre2_match_context PRIV(default_match_context) = { +pcre2_match_context PRIV(default_match_context) = { { default_malloc, default_free, NULL }, #ifdef SUPPORT_JIT NULL, /* JIT callback */ @@ -173,6 +174,8 @@ const pcre2_match_context PRIV(default_match_context) = { NULL, /* Callout data */ NULL, /* Substitute callout function */ NULL, /* Substitute callout data */ + NULL, /* Substitute case callout function */ + NULL, /* Substitute case callout data */ PCRE2_UNSET, /* Offset limit */ HEAP_LIMIT, MATCH_LIMIT, @@ -197,7 +200,7 @@ return mcontext; /* A default convert context is set up to save having to initialize at run time when no context is supplied to the convert function. */ -const pcre2_convert_context PRIV(default_convert_context) = { +pcre2_convert_context PRIV(default_convert_context) = { { default_malloc, default_free, NULL }, /* Default memory handling */ #ifdef _WIN32 CHAR_BACKSLASH, /* Default path separator */ @@ -409,6 +412,38 @@ ccontext->stack_guard_data = user_data; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive) +{ +if (ccontext == NULL) + return PCRE2_ERROR_NULL; + +switch (directive) + { + case PCRE2_OPTIMIZATION_NONE: + ccontext->optimization_flags = 0; + break; + + case PCRE2_OPTIMIZATION_FULL: + ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL; + break; + + default: + if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF) + { + /* Even directive numbers starting from 64 switch a bit on; + * Odd directive numbers starting from 65 switch a bit off */ + if ((directive & 1) != 0) + ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32)); + else + ccontext->optimization_flags |= 1u << ((directive >> 1) - 32); + return 0; + } + return PCRE2_ERROR_BADOPTION; + } + +return 0; +} /* ------------ Match context ------------ */ @@ -424,13 +459,24 @@ return 0; PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_substitute_callout(pcre2_match_context *mcontext, int (*substitute_callout)(pcre2_substitute_callout_block *, void *), - void *substitute_callout_data) + void *substitute_callout_data) { mcontext->substitute_callout = substitute_callout; mcontext->substitute_callout_data = substitute_callout_data; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *), + void *substitute_case_callout_data) +{ +mcontext->substitute_case_callout = substitute_case_callout; +mcontext->substitute_case_callout_data = substitute_case_callout_data; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit) { diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c index fe396ae..d2b238c 100644 --- a/src/pcre2_convert.c +++ b/src/pcre2_convert.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -74,7 +74,7 @@ enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, #define PUTCHARS(string) \ { \ - for (s = (char *)(string); *s != 0; s++) \ + for (const char *s = string; *s != 0; s++) \ { \ if (p >= endp) return PCRE2_ERROR_NOMEMORY; \ *p++ = *s; \ @@ -125,7 +125,6 @@ convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength, BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) { -char *s; PCRE2_SPTR posix = pattern; PCRE2_UCHAR *p = use_buffer; PCRE2_UCHAR *pp = p; @@ -1065,7 +1064,7 @@ pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options, PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr, pcre2_convert_context *ccontext) { -int i, rc; +int rc; PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE]; PCRE2_UCHAR *use_buffer = dummy_buffer; PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE; @@ -1119,7 +1118,7 @@ if (buffptr != NULL && *buffptr != NULL) /* Call an individual converter, either just once (if a buffer was provided or just the length is needed), or twice (if a memory allocation is required). */ -for (i = 0; i < 2; i++) +for (int i = 0; i < 2; i++) { PCRE2_UCHAR *allocated; BOOL dummyrun = buffptr == NULL || *buffptr == NULL; @@ -1138,8 +1137,7 @@ for (i = 0; i < 2; i++) break; default: - *bufflenptr = 0; /* Error offset */ - return PCRE2_ERROR_INTERNAL; + goto EXIT; } if (rc != 0 || /* Error */ @@ -1159,8 +1157,12 @@ for (i = 0; i < 2; i++) use_length = *bufflenptr + 1; } -/* Control should never get here. */ +/* Something went terribly wrong. Trigger an assert and return an error */ +PCRE2_DEBUG_UNREACHABLE(); +EXIT: + +*bufflenptr = 0; /* Error offset */ return PCRE2_ERROR_INTERNAL; } diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index caae652..ebf31d2 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -156,6 +156,7 @@ static const uint8_t coptable[] = { 0, /* CLASS */ 0, /* NCLASS */ 0, /* XCLASS - variable length */ + 0, /* ECLASS - variable length */ 0, /* REF */ 0, /* REFI */ 0, /* DNREF */ @@ -175,6 +176,7 @@ static const uint8_t coptable[] = { 0, /* Assert behind not */ 0, /* NA assert */ 0, /* NA assert behind */ + 0, /* Assert scan substring */ 0, /* ONCE */ 0, /* SCRIPT_RUN */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ @@ -188,7 +190,7 @@ static const uint8_t coptable[] = { 0, 0, /* COMMIT, COMMIT_ARG */ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ - 0, 0 /* \B and \b in UCP mode */ + 0, 0, /* \B and \b in UCP mode */ }; /* This table identifies those opcodes that inspect a character. It is used to @@ -234,6 +236,7 @@ static const uint8_t poptable[] = { 1, /* CLASS */ 1, /* NCLASS */ 1, /* XCLASS - variable length */ + 1, /* ECLASS - variable length */ 0, /* REF */ 0, /* REFI */ 0, /* DNREF */ @@ -253,6 +256,7 @@ static const uint8_t poptable[] = { 0, /* Assert behind not */ 0, /* NA assert */ 0, /* NA assert behind */ + 0, /* Assert scan substring */ 0, /* ONCE */ 0, /* SCRIPT_RUN */ 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ @@ -266,9 +270,13 @@ static const uint8_t poptable[] = { 0, 0, /* COMMIT, COMMIT_ARG */ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ - 1, 1 /* \B and \b in UCP mode */ + 1, 1, /* \B and \b in UCP mode */ }; +/* Compile-time check that these tables have the correct size. */ +STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable); +STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable); + /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, and \w */ @@ -695,7 +703,6 @@ for (;;) int i, j; int clen, dlen; uint32_t c, d; - int forced_fail = 0; BOOL partial_newline = FALSE; BOOL could_continue = reset_could_continue; reset_could_continue = FALSE; @@ -841,19 +848,6 @@ for (;;) switch (codevalue) { -/* ========================================================================== */ - /* These cases are never obeyed. This is a fudge that causes a compile- - time error if the vectors coptable or poptable, which are indexed by - opcode, are not the correct length. It seems to be the only way to do - such a check at compile time, as the sizeof() operator does not work - in the C preprocessor. */ - - case OP_TABLE_LENGTH: - case OP_TABLE_LENGTH + - ((sizeof(coptable) == OP_TABLE_LENGTH) && - (sizeof(poptable) == OP_TABLE_LENGTH)): - return 0; - /* ========================================================================== */ /* Reached a closing bracket. If not at the end of the pattern, carry on with the next opcode. For repeating opcodes, also add the repeat @@ -1179,10 +1173,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[1]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || @@ -1462,10 +1452,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[2]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; @@ -1727,10 +1713,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[2]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; @@ -2017,10 +1999,6 @@ for (;;) const ucd_record * prop = GET_UCD(c); switch(code[1 + IMM2_SIZE + 1]) { - case PT_ANY: - OK = TRUE; - break; - case PT_LAMP: chartype = prop->chartype; OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; @@ -2663,35 +2641,54 @@ for (;;) case OP_CLASS: case OP_NCLASS: +#ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: +#endif { BOOL isinclass = FALSE; int next_state_offset; PCRE2_SPTR ecode; +#ifdef SUPPORT_WIDE_CHARS + /* An extended class may have a table or a list of single characters, + ranges, or both, and it may be positive or negative. There's a + function that sorts all this out. */ + + if (codevalue == OP_XCLASS) + { + ecode = code + GET(code, 1); + if (clen > 0) + isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, + (const uint8_t*)mb->start_code, utf); + } + + /* A nested set-based class has internal opcodes for performing + set operations. */ + + else if (codevalue == OP_ECLASS) + { + ecode = code + GET(code, 1); + if (clen > 0) + isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode, + (const uint8_t*)mb->start_code, utf); + } + + else +#endif /* SUPPORT_WIDE_CHARS */ + /* For a simple class, there is always just a 32-byte table, and we can set isinclass from it. */ - if (codevalue != OP_XCLASS) { ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR)); if (clen > 0) { isinclass = (c > 255)? (codevalue == OP_NCLASS) : - ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0); + ((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0); } } - /* An extended class may have a table or a list of single characters, - ranges, or both, and it may be positive or negative. There's a - function that sorts all this out. */ - - else - { - ecode = code + GET(code, 1); - if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); - } - /* At this point, isinclass is set for all kinds of class, and ecode points to the byte after the end of the class. If there is a quantifier, this is where it will be. */ @@ -2784,7 +2781,6 @@ for (;;) though the other "backtracking verbs" are not supported. */ case OP_FAIL: - forced_fail++; /* Count FAILs for multiple states */ break; case OP_ASSERT: @@ -3058,7 +3054,7 @@ for (;;) if (codevalue == OP_BRAPOSZERO) { allow_zero = TRUE; - codevalue = *(++code); /* Codevalue will be one of above BRAs */ + ++code; /* The following opcode will be one of the above BRAs */ } else allow_zero = FALSE; @@ -3271,18 +3267,12 @@ for (;;) matches that we are going to find. If partial matching has been requested, check for appropriate conditions. - The "forced_ fail" variable counts the number of (*F) encountered for the - character. If it is equal to the original active_count (saved in - workspace[1]) it means that (*F) was found on every active state. In this - case we don't want to give a partial match. - The "could_continue" variable is true if a state could have continued but for the fact that the end of the subject was reached. */ if (new_count <= 0) { if (could_continue && /* Some could go on, and */ - forced_fail != workspace[1] && /* Not all forced fail & */ ( /* either... */ (mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */ || /* or... */ @@ -3438,7 +3428,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the options variable for this function. Users of PCRE2 who are not calling the function directly would like to have a way of setting these flags, in the same -way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with +way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be transferred to the options for this function. The bits are guaranteed to be @@ -3528,8 +3518,7 @@ if (mb->match_limit_depth > re->limit_depth) if (mb->heap_limit > re->limit_heap) mb->heap_limit = re->limit_heap; -mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + - re->name_count * re->name_entry_size; +mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start); mb->tables = re->tables; mb->start_subject = subject; mb->end_subject = end_subject; @@ -3576,7 +3565,9 @@ switch(re->newline_convention) mb->nltype = NLTYPE_ANYCRLF; break; - default: return PCRE2_ERROR_INTERNAL; + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; } /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, @@ -3705,7 +3696,7 @@ for (;;) these, for testing and for ensuring that all callouts do actually occur. The optimizations must also be avoided when restarting a DFA match. */ - if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 && (options & PCRE2_DFA_RESTART) == 0) { /* If firstline is TRUE, the start of the match is constrained to the first diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 7fa997a..8b7423c 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -96,7 +96,7 @@ static const unsigned char compile_error_texts[] = "length of lookbehind assertion is not limited\0" "a relative value of zero is not allowed\0" "conditional subpattern contains more than two branches\0" - "assertion expected after (?( or (?(?C)\0" + "atomic assertion expected after (?( or (?(?C)\0" "digit expected after (?+ or (?-\0" /* 30 */ "unknown POSIX class name\0" @@ -161,7 +161,7 @@ static const unsigned char compile_error_texts[] = "using UCP is disabled by the application\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "character code point value in \\u.... sequence is too large\0" - "digits missing in \\x{} or \\o{} or \\N{U+}\0" + "digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0" "syntax error or number too big in (?(VERSION condition\0" /* 80 */ "internal error: unknown opcode in auto_possessify()\0" @@ -185,11 +185,29 @@ static const unsigned char compile_error_texts[] = "(*alpha_assertion) not recognized\0" "script runs require Unicode support, which this version of PCRE2 does not have\0" "too many capturing groups (maximum 65535)\0" - "atomic assertion expected after (?( or (?(?C)\0" + "octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0" "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" /* 100 */ "branch too long in variable-length lookbehind assertion\0" "compiled pattern would be longer than the limit set by the application\0" + "octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0" + "using callouts is disabled by the application\0" + "PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0" + /* 105 */ + "PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0" + "PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0" + "extended character class nesting is too deep\0" + "invalid operator in extended character class\0" + "unexpected operator in extended character class (no preceding operand)\0" + /* 110 */ + "expected operand after operator in extended character class\0" + "square brackets needed to clarify operator precedence in extended character class\0" + "missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0" + "unexpected expression in extended character class (no preceding operator)\0" + "empty expression in extended character class\0" + /* 115 */ + "terminating ] with no following closing parenthesis in (?[...]\0" + "unexpected character in (?[...]) extended character class\0" ; /* Match-time and UTF error texts are in the same format. */ @@ -276,6 +294,10 @@ static const unsigned char match_error_texts[] = "internal error - duplicate substitution match\0" "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" "INTERNAL ERROR: invalid substring offset\0" + "feature is not supported by the JIT compiler\0" + "error performing replacement case transformation\0" + /* 70 */ + "replacement too large (longer than PCRE2_SIZE)\0" ; @@ -318,7 +340,7 @@ else if (enumber < 0) /* Match or UTF error */ } else /* Invalid error number */ { - message = (unsigned char *)"\0"; /* Empty message list */ + message = (const unsigned char *)"\0"; /* Empty message list */ n = 1; } diff --git a/src/pcre2_extuni.c b/src/pcre2_extuni.c index 4ed9f00..91d839e 100644 --- a/src/pcre2_extuni.c +++ b/src/pcre2_extuni.c @@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE. /* This module contains an internal function that is used to match a Unicode extended grapheme sequence. It is used by both pcre2_match() and -pcre2_def_match(). However, it is called only when Unicode support is being +pcre2_dfa_match(). However, it is called only when Unicode support is being compiled. Nevertheless, we provide a dummy function when there is no Unicode support, because some compilers do not like functionless source files. */ diff --git a/src/pcre2_find_bracket.c b/src/pcre2_find_bracket.c index 1290c5e..486f453 100644 --- a/src/pcre2_find_bracket.c +++ b/src/pcre2_find_bracket.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -76,18 +76,19 @@ for (;;) if (c == OP_END) return NULL; /* XCLASS is used for classes that cannot be represented just by a bit map. - This includes negated single high-valued characters. CALLOUT_STR is used for - callouts with string arguments. In both cases the length in the table is + This includes negated single high-valued characters. ECLASS is used for + classes that use set operations internally. CALLOUT_STR is used for + callouts with string arguments. In each case the length in the table is zero; the actual length is stored in the compiled code. */ - if (c == OP_XCLASS) code += GET(code, 1); - else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); /* Handle lookbehind */ else if (c == OP_REVERSE || c == OP_VREVERSE) { - if (number < 0) return (PCRE2_UCHAR *)code; + if (number < 0) return code; code += PRIV(OP_lengths)[c]; } @@ -97,7 +98,7 @@ for (;;) c == OP_CBRAPOS || c == OP_SCBRAPOS) { int n = (int)GET2(code, 1+LINK_SIZE); - if (n == number) return (PCRE2_UCHAR *)code; + if (n == number) return code; code += PRIV(OP_lengths)[c]; } diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index cd78435..fa2b51b 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -42,7 +42,8 @@ below that output them. */ #define ALLOWED_COMPILE_OPTIONS \ (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ - PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ + PCRE2_ALT_EXTENDED_CLASS|PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT| \ + PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \ PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \ PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ @@ -67,9 +68,10 @@ fprintf(stream, "Compile options %s%.8x =", (compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "", compile_options); -fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", +fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", + ((compile_options & PCRE2_ALT_EXTENDED_CLASS) != 0)? "alt_extended_class" : "", ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", ((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "", @@ -169,8 +171,8 @@ for (int index = 0; index < count; index++) static void describe_failure( const char *task, - const unsigned char *data, - size_t size, + const PCRE2_UCHAR *data, + PCRE2_SIZE size, uint32_t compile_options, uint32_t match_options, int errorcode, @@ -257,7 +259,7 @@ getrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024; if (rlim.rlim_cur > rlim.rlim_max) { - fprintf(stderr, "Hard stack size limit is too small (needed 8MiB)!\n"); + fprintf(stderr, "Hard stack size limit is too small\n"); _exit(1); } rc = setrlimit(RLIMIT_STACK, &rlim); @@ -328,8 +330,8 @@ if (size > 3) continue; i++; /* Points to '{' */ - /* Loop for two values a quantifier. Offset i points to brace or comma at the - start of the loop.*/ + /* Loop for two values in a quantifier. Offset i points to brace or comma + at the start of the loop. */ for (int ii = 0; ii < 2; ii++) { @@ -337,7 +339,7 @@ if (size > 3) if (i >= size - 1) goto END_QSCAN; /* Can happen for , */ - /* Ignore leading spaces */ + /* Ignore leading spaces. */ while (wdata[i+1] == ' ' || wdata[i+1] == '\t') { @@ -345,7 +347,16 @@ if (size > 3) if (i >= size - 1) goto END_QSCAN; } - /* Scan for a number ending in brace or comma in the first iteration, + /* Ignore non-significant leading zeros. */ + + while (wdata[i+1] == '0' && i+2 < size && wdata[i+2] >= '0' && + wdata[i+2] <= '9') + { + i++; + if (i >= size - 1) goto END_QSCAN; + } + + /* Scan for a number ending in brace, or comma in the first iteration, optionally preceded by space. */ for (j = i + 1; j < size && j < i + 7; j++) @@ -358,18 +369,19 @@ if (size > 3) if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP; } if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break; + if (wdata[j] < '0' || wdata[j] > '9') { j--; /* Ensure this character is checked next. The */ goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */ } - q = q * 10 + wdata[j] - '0'; + q = q * 10 + (wdata[j] - '0'); } if (j >= size) goto END_QSCAN; /* End of data */ - /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which is - the maximum quantifier. Leave such numbers alone. */ + /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which + is the maximum quantifier. Leave such numbers alone. */ if (j >= i + 7 || q > 65535) goto OUTERLOOP; @@ -700,8 +712,8 @@ with the interpreter. */ if (match_data != NULL) pcre2_match_data_free(match_data); #ifdef SUPPORT_JIT if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit); -free(newwdata); #endif +free(newwdata); if (match_context != NULL) pcre2_match_context_free(match_context); if (compile_context != NULL) pcre2_compile_context_free(compile_context); return 0; diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index e580818..6e0a5e0 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -88,6 +88,12 @@ typedef int BOOL; #define TRUE 1 #endif +/* Helper macro for static (compile-time) assertions. Can be used inside +functions, or at the top-level of a file. */ +#define STATIC_ASSERT_JOIN(a,b) a ## b +#define STATIC_ASSERT(cond, msg) \ + typedef int STATIC_ASSERT_JOIN(static_assertion_,msg)[(cond)?1:-1] + /* Valgrind (memcheck) support */ #ifdef SUPPORT_VALGRIND @@ -523,29 +529,29 @@ start/end of string field names are. */ three must not be changed, because whichever is set is actually the number of bytes in a code unit in that mode. */ -#define PCRE2_MODE8 0x00000001 /* compiled in 8 bit mode */ -#define PCRE2_MODE16 0x00000002 /* compiled in 16 bit mode */ -#define PCRE2_MODE32 0x00000004 /* compiled in 32 bit mode */ -#define PCRE2_FIRSTSET 0x00000010 /* first_code unit is set */ -#define PCRE2_FIRSTCASELESS 0x00000020 /* caseless first code unit */ -#define PCRE2_FIRSTMAPSET 0x00000040 /* bitmap of first code units is set */ -#define PCRE2_LASTSET 0x00000080 /* last code unit is set */ -#define PCRE2_LASTCASELESS 0x00000100 /* caseless last code unit */ -#define PCRE2_STARTLINE 0x00000200 /* start after \n for multiline */ -#define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */ -#define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ -#define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ -#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ -#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ -#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ -#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */ -#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ -#define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */ -#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ -#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ -#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ -#define PCRE2_HASBKC 0x00400000 /* contains \C */ -#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */ +#define PCRE2_MODE8 0x00000001u /* compiled in 8 bit mode */ +#define PCRE2_MODE16 0x00000002u /* compiled in 16 bit mode */ +#define PCRE2_MODE32 0x00000004u /* compiled in 32 bit mode */ +#define PCRE2_FIRSTSET 0x00000010u /* first_code unit is set */ +#define PCRE2_FIRSTCASELESS 0x00000020u /* caseless first code unit */ +#define PCRE2_FIRSTMAPSET 0x00000040u /* bitmap of first code units is set */ +#define PCRE2_LASTSET 0x00000080u /* last code unit is set */ +#define PCRE2_LASTCASELESS 0x00000100u /* caseless last code unit */ +#define PCRE2_STARTLINE 0x00000200u /* start after \n for multiline */ +#define PCRE2_JCHANGED 0x00000400u /* j option used in pattern */ +#define PCRE2_HASCRORLF 0x00000800u /* explicit \r or \n in pattern */ +#define PCRE2_HASTHEN 0x00001000u /* pattern contains (*THEN) */ +#define PCRE2_MATCH_EMPTY 0x00002000u /* pattern can match empty string */ +#define PCRE2_BSR_SET 0x00004000u /* BSR was set in the pattern */ +#define PCRE2_NL_SET 0x00008000u /* newline was set in the pattern */ +#define PCRE2_NOTEMPTY_SET 0x00010000u /* (*NOTEMPTY) used ) keep */ +#define PCRE2_NE_ATST_SET 0x00020000u /* (*NOTEMPTY_ATSTART) used) together */ +#define PCRE2_DEREF_TABLES 0x00040000u /* release character tables */ +#define PCRE2_NOJIT 0x00080000u /* (*NOJIT) used */ +#define PCRE2_HASBKPORX 0x00100000u /* contains \P, \p, or \X */ +#define PCRE2_DUPCAPUSED 0x00200000u /* contains (?| */ +#define PCRE2_HASBKC 0x00400000u /* contains \C */ +#define PCRE2_HASACCEPT 0x00800000u /* contains (*ACCEPT) */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) @@ -574,6 +580,16 @@ modes. */ #define REQ_CU_MAX 2000 #endif +/* The maximum nesting depth for Unicode character class sets. +Currently fixed. Warning: the interpreter relies on this so it can encode +the operand stack in a uint32_t. A nesting limit of 15 implies (15*2+1)=31 +stack operands required, due to the fact that we have two (and only two) +levels of operator precedence. In the UTS#18 syntax, you can write 'x&&y[z]' +and in Perl syntax you can write '(?[ x - y & (z) ])', both of which imply +pushing the match results for x & y to the stack. */ + +#define ECLASS_NEST_LIMIT 15 + /* Offsets for the bitmap tables in the cbits set of tables. Each table contains a set of bits for a class map. Some classes are built by combining these tables. */ @@ -609,6 +625,13 @@ total length of the tables. */ #define ctypes_offset (cbits_offset + cbit_length) /* Character types */ #define TABLES_LENGTH (ctypes_offset + 256) +/* Private flags used in compile_context.optimization_flags */ + +#define PCRE2_OPTIM_AUTO_POSSESS 0x00000001u +#define PCRE2_OPTIM_DOTSTAR_ANCHOR 0x00000002u +#define PCRE2_OPTIM_START_OPTIMIZE 0x00000004u + +#define PCRE2_OPTIMIZATION_ALL 0x00000007u /* -------------------- Character and string names ------------------------ */ @@ -915,6 +938,7 @@ a positive value. */ #define STRING_naplb0 "naplb\0" #define STRING_nla0 "nla\0" #define STRING_nlb0 "nlb\0" +#define STRING_scs0 "scs\0" #define STRING_sr0 "sr\0" #define STRING_asr0 "asr\0" #define STRING_positive_lookahead0 "positive_lookahead\0" @@ -925,6 +949,7 @@ a positive value. */ #define STRING_negative_lookbehind0 "negative_lookbehind\0" #define STRING_script_run0 "script_run\0" #define STRING_atomic_script_run "atomic_script_run" +#define STRING_scan_substring0 "scan_substring\0" #define STRING_alpha0 "alpha\0" #define STRING_lower0 "lower\0" @@ -965,6 +990,8 @@ a positive value. */ #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" #define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" +#define STRING_CASELESS_RESTRICT_RIGHTPAR "CASELESS_RESTRICT)" +#define STRING_TURKISH_CASING_RIGHTPAR "TURKISH_CASING)" #define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" @@ -1216,6 +1243,7 @@ only. */ #define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0" #define STRING_nla0 STR_n STR_l STR_a "\0" #define STRING_nlb0 STR_n STR_l STR_b "\0" +#define STRING_scs0 STR_s STR_c STR_s "\0" #define STRING_sr0 STR_s STR_r "\0" #define STRING_asr0 STR_a STR_s STR_r "\0" #define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" @@ -1226,6 +1254,7 @@ only. */ #define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" #define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0" #define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n +#define STRING_scan_substring0 STR_s STR_c STR_a STR_n STR_UNDERSCORE STR_s STR_u STR_b STR_s STR_t STR_r STR_i STR_n STR_g "\0" #define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" #define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" @@ -1266,6 +1295,8 @@ only. */ #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS +#define STRING_CASELESS_RESTRICT_RIGHTPAR STR_C STR_A STR_S STR_E STR_L STR_E STR_S STR_S STR_UNDERSCORE STR_R STR_E STR_S STR_T STR_R STR_I STR_C STR_T STR_RIGHT_PARENTHESIS +#define STRING_TURKISH_CASING_RIGHTPAR STR_T STR_U STR_R STR_K STR_I STR_S STR_H STR_UNDERSCORE STR_C STR_A STR_S STR_I STR_N STR_G STR_RIGHT_PARENTHESIS #define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN @@ -1290,21 +1321,22 @@ only. */ changed, the autopossessifying table in pcre2_auto_possess.c must be updated to match. */ -#define PT_ANY 0 /* Any property - matches all chars */ -#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ -#define PT_GC 2 /* Specified general characteristic (e.g. L) */ -#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ -#define PT_SC 4 /* Script only (e.g. Han) */ -#define PT_SCX 5 /* Script extensions (includes SC) */ -#define PT_ALNUM 6 /* Alphanumeric - the union of L and N */ -#define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */ -#define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */ -#define PT_WORD 9 /* Word - L, N, Mn, or Pc */ -#define PT_CLIST 10 /* Pseudo-property: match character list */ -#define PT_UCNC 11 /* Universal Character nameable character */ -#define PT_BIDICL 12 /* Specified bidi class */ -#define PT_BOOL 13 /* Boolean property */ -#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */ +#define PT_LAMP 0 /* L& - the union of Lu, Ll, Lt */ +#define PT_GC 1 /* Specified general characteristic (e.g. L) */ +#define PT_PC 2 /* Specified particular characteristic (e.g. Lu) */ +#define PT_SC 3 /* Script only (e.g. Han) */ +#define PT_SCX 4 /* Script extensions (includes SC) */ +#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ +#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */ +#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ +#define PT_WORD 8 /* Word - L, N, Mn, or Pc */ +#define PT_CLIST 9 /* Pseudo-property: match character list */ +#define PT_UCNC 10 /* Universal Character nameable character */ +#define PT_BIDICL 11 /* Specified bidi class */ +#define PT_BOOL 12 /* Boolean property */ +#define PT_ANY 13 /* Must be the last entry! + Any property - matches all chars */ +#define PT_TABSIZE PT_ANY /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE2_UCP is set - in other words, for Unicode @@ -1334,6 +1366,94 @@ contain characters with values greater than 255. */ #define XCL_RANGE 2 /* A range (two multibyte chars) follows */ #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ +/* This value represents the beginning of character lists. The value +is 16 bit long, and stored as a high and low byte pair in 8 bit mode. +The lower 12 bit contains information about character lists (see later). */ +#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000) + +/* When a character class contains many characters/ranges, +they are stored in character lists. There are four character +lists which contain characters/ranges within a given range. + +The name, character range and item size for each list: +Low16 [0x100 - 0x7fff] 16 bit items +High16 [0x8000 - 0xffff] 16 bit items +Low32 [0x10000 - 0x7fffffff] 32 bit items +High32 [0x80000000 - 0xffffffff] 32 bit items + +The Low32 character list is used only when utf encoding or 32 bit +character width is enabled, and the High32 character is used only +when 32 bit character width is enabled. + +Each character list contain items. The lowest bit represents that +an item is the beginning of a range (bit is cleared), or not (bit +is set). The other bits represent the character shifted left by +one, so its highest bit is discarded. Due to the layout of character +lists, the highest bit of a character is always known: + +Low16 and Low32: the highest bit is always zero +High16 and High32: the highest bit is always one + +The items are ordered in increasing order, so binary search can be +used to find the lower bound of an input character. The lower bound +is the highest item, which value is less or equal than the input +character. If the lower bit of the item is cleard, or the character +stored in the item equals to the input character, the input +character is in the character list. */ + +/* Character list constants. */ +#define XCL_CHAR_LIST_LOW_16_START 0x100 +#define XCL_CHAR_LIST_LOW_16_END 0x7fff +#define XCL_CHAR_LIST_LOW_16_ADD 0x0 + +#define XCL_CHAR_LIST_HIGH_16_START 0x8000 +#define XCL_CHAR_LIST_HIGH_16_END 0xffff +#define XCL_CHAR_LIST_HIGH_16_ADD 0x8000 + +#define XCL_CHAR_LIST_LOW_32_START 0x10000 +#define XCL_CHAR_LIST_LOW_32_END 0x7fffffff +#define XCL_CHAR_LIST_LOW_32_ADD 0x0 + +#define XCL_CHAR_LIST_HIGH_32_START 0x80000000 +#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff +#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000 + +/* Mask for getting the descriptors of character list ranges. +Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed +by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */ +#define XCL_TYPE_MASK 0xfff +#define XCL_TYPE_BIT_LEN 3 +/* If this bit is set, the first item of the character list is the +end of a range, which started before the starting character of the +character list. */ +#define XCL_BEGIN_WITH_RANGE 0x4 +/* Number of items in the character list: 0, 1, or 2. The value 3 +represents that the item count is stored at the begining of the +character list. The item count has the same width as the items +in the character list (e.g. 16 bit for Low16 and High16 lists). */ +#define XCL_ITEM_COUNT_MASK 0x3 +/* Shift and flag for constructing character list items. The XCL_CHAR_END +is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT +can be used to encode / decode the character value stored in an item. */ +#define XCL_CHAR_END 0x1 +#define XCL_CHAR_SHIFT 1 + +/* Flag bits for an extended class (OP_ECLASS), which is used for complex +character matches such as [\p{Greek} && \p{Ll}]. */ + +#define ECL_MAP 0x01 /* Flag: a 32-byte map is present */ + +/* Type tags for the items stored in an extended class (OP_ECLASS). These items +follow the OP_ECLASS's flag char and bitmap, and represent a Reverse Polish +Notation list of operands and operators manipulating a stack of bits. */ + +#define ECL_AND 1 /* Pop two from the stack, AND, and push result. */ +#define ECL_OR 2 /* Pop two from the stack, OR, and push result. */ +#define ECL_XOR 3 /* Pop two from the stack, XOR, and push result. */ +#define ECL_NOT 4 /* Pop one from the stack, NOT, and push result. */ +#define ECL_XCLASS 5 /* XCLASS nested within ECLASS; match and push result. */ +#define ECL_ANY 6 /* Temporary, only used during compilation. */ +#define ECL_NONE 7 /* Temporary, only used during compilation. */ /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 @@ -1555,102 +1675,105 @@ enum { character > 255 is encountered. */ OP_XCLASS, /* 112 Extended class for handling > 255 chars within the class. This does both positive and negative. */ - OP_REF, /* 113 Match a back reference, casefully */ - OP_REFI, /* 114 Match a back reference, caselessly */ - OP_DNREF, /* 115 Match a duplicate name backref, casefully */ - OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ - OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ - OP_CALLOUT, /* 118 Call out to external function if provided */ - OP_CALLOUT_STR, /* 119 Call out with string argument */ - - OP_ALT, /* 120 Start of alternation */ - OP_KET, /* 121 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 122 These two must remain together and in this */ - OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */ - OP_KETRPOS, /* 124 Possessive unlimited repeat. */ + OP_ECLASS, /* 113 Really-extended class, for handling logical + expressions computed over characters. */ + OP_REF, /* 114 Match a back reference, casefully */ + OP_REFI, /* 115 Match a back reference, caselessly */ + OP_DNREF, /* 116 Match a duplicate name backref, casefully */ + OP_DNREFI, /* 117 Match a duplicate name backref, caselessly */ + OP_RECURSE, /* 118 Match a numbered subpattern (possibly recursive) */ + OP_CALLOUT, /* 119 Call out to external function if provided */ + OP_CALLOUT_STR, /* 120 Call out with string argument */ + + OP_ALT, /* 121 Start of alternation */ + OP_KET, /* 122 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 123 These two must remain together and in this */ + OP_KETRMIN, /* 124 order. They are for groups the repeat for ever. */ + OP_KETRPOS, /* 125 Possessive unlimited repeat. */ /* The assertions must come before BRA, CBRA, ONCE, and COND. */ - OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */ - OP_VREVERSE, /* 126 Move pointer back - variable */ - OP_ASSERT, /* 127 Positive lookahead */ - OP_ASSERT_NOT, /* 128 Negative lookahead */ - OP_ASSERTBACK, /* 129 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */ - OP_ASSERT_NA, /* 131 Positive non-atomic lookahead */ - OP_ASSERTBACK_NA, /* 132 Positive non-atomic lookbehind */ + OP_REVERSE, /* 126 Move pointer back - used in lookbehind assertions */ + OP_VREVERSE, /* 127 Move pointer back - variable */ + OP_ASSERT, /* 128 Positive lookahead */ + OP_ASSERT_NOT, /* 129 Negative lookahead */ + OP_ASSERTBACK, /* 130 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 131 Negative lookbehind */ + OP_ASSERT_NA, /* 132 Positive non-atomic lookahead */ + OP_ASSERTBACK_NA, /* 133 Positive non-atomic lookbehind */ + OP_ASSERT_SCS, /* 134 Scan substring */ /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. The POS versions must immediately follow the non-POS versions in each case. */ - OP_ONCE, /* 133 Atomic group, contains captures */ - OP_SCRIPT_RUN, /* 134 Non-capture, but check characters' scripts */ - OP_BRA, /* 135 Start of non-capturing bracket */ - OP_BRAPOS, /* 136 Ditto, with unlimited, possessive repeat */ - OP_CBRA, /* 137 Start of capturing bracket */ - OP_CBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ - OP_COND, /* 139 Conditional group */ + OP_ONCE, /* 135 Atomic group, contains captures */ + OP_SCRIPT_RUN, /* 136 Non-capture, but check characters' scripts */ + OP_BRA, /* 137 Start of non-capturing bracket */ + OP_BRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ + OP_CBRA, /* 139 Start of capturing bracket */ + OP_CBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */ + OP_COND, /* 141 Conditional group */ /* These five must follow the previous five, in the same order. There's a check for >= SBRA to distinguish the two sets. */ - OP_SBRA, /* 140 Start of non-capturing bracket, check empty */ - OP_SBRAPOS, /* 141 Ditto, with unlimited, possessive repeat */ - OP_SCBRA, /* 142 Start of capturing bracket, check empty */ - OP_SCBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */ - OP_SCOND, /* 144 Conditional group, check empty */ + OP_SBRA, /* 142 Start of non-capturing bracket, check empty */ + OP_SBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */ + OP_SCBRA, /* 144 Start of capturing bracket, check empty */ + OP_SCBRAPOS, /* 145 Ditto, with unlimited, possessive repeat */ + OP_SCOND, /* 146 Conditional group, check empty */ /* The next two pairs must (respectively) be kept together. */ - OP_CREF, /* 145 Used to hold a capture number as condition */ - OP_DNCREF, /* 146 Used to point to duplicate names as a condition */ - OP_RREF, /* 147 Used to hold a recursion number as condition */ - OP_DNRREF, /* 148 Used to point to duplicate names as a condition */ - OP_FALSE, /* 149 Always false (used by DEFINE and VERSION) */ - OP_TRUE, /* 150 Always true (used by VERSION) */ + OP_CREF, /* 147 Used to hold a capture number as condition */ + OP_DNCREF, /* 148 Used to point to duplicate names as a condition */ + OP_RREF, /* 149 Used to hold a recursion number as condition */ + OP_DNRREF, /* 150 Used to point to duplicate names as a condition */ + OP_FALSE, /* 151 Always false (used by DEFINE and VERSION) */ + OP_TRUE, /* 152 Always true (used by VERSION) */ - OP_BRAZERO, /* 151 These two must remain together and in this */ - OP_BRAMINZERO, /* 152 order. */ - OP_BRAPOSZERO, /* 153 */ + OP_BRAZERO, /* 153 These two must remain together and in this */ + OP_BRAMINZERO, /* 154 order. */ + OP_BRAPOSZERO, /* 155 */ /* These are backtracking control verbs */ - OP_MARK, /* 154 always has an argument */ - OP_PRUNE, /* 155 */ - OP_PRUNE_ARG, /* 156 same, but with argument */ - OP_SKIP, /* 157 */ - OP_SKIP_ARG, /* 158 same, but with argument */ - OP_THEN, /* 159 */ - OP_THEN_ARG, /* 160 same, but with argument */ - OP_COMMIT, /* 161 */ - OP_COMMIT_ARG, /* 162 same, but with argument */ + OP_MARK, /* 156 always has an argument */ + OP_PRUNE, /* 157 */ + OP_PRUNE_ARG, /* 158 same, but with argument */ + OP_SKIP, /* 159 */ + OP_SKIP_ARG, /* 160 same, but with argument */ + OP_THEN, /* 161 */ + OP_THEN_ARG, /* 162 same, but with argument */ + OP_COMMIT, /* 163 */ + OP_COMMIT_ARG, /* 164 same, but with argument */ /* These are forced failure and success verbs. FAIL and ACCEPT do accept an argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL) without the need for a special opcode. */ - OP_FAIL, /* 163 */ - OP_ACCEPT, /* 164 */ - OP_ASSERT_ACCEPT, /* 165 Used inside assertions */ - OP_CLOSE, /* 166 Used before OP_ACCEPT to close open captures */ + OP_FAIL, /* 165 */ + OP_ACCEPT, /* 166 */ + OP_ASSERT_ACCEPT, /* 167 Used inside assertions */ + OP_CLOSE, /* 168 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO, /* 167 */ + OP_SKIPZERO, /* 169 */ /* This is used to identify a DEFINE group during compilation so that it can be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ - OP_DEFINE, /* 168 */ + OP_DEFINE, /* 170 */ /* These opcodes replace their normal counterparts in UCP mode when PCRE2_EXTRA_ASCII_BSW is not set. */ - OP_NOT_UCP_WORD_BOUNDARY, /* 169 */ - OP_UCP_WORD_BOUNDARY, /* 170 */ + OP_NOT_UCP_WORD_BOUNDARY, /* 171 */ + OP_UCP_WORD_BOUNDARY, /* 172 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been @@ -1693,19 +1816,21 @@ some cases doesn't actually use these names at all). */ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", \ "*+","++", "?+", "{", \ - "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ + "class", "nclass", "xclass", "eclass", \ + "Ref", "Refi", "DnRef", "DnRefi", \ "Recurse", "Callout", "CalloutStr", \ "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ "Reverse", "VReverse", "Assert", "Assert not", \ "Assert back", "Assert back not", \ "Non-atomic assert", "Non-atomic assert back", \ + "Scan substring", \ "Once", \ "Script run", \ "Bra", "BraPos", "CBra", "CBraPos", \ "Cond", \ "SBra", "SBraPos", "SCBra", "SCBraPos", \ "SCond", \ - "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \ + "Capture ref", "Capture dnref", "Cond rec", "Cond dnrec", \ "Cond false", "Cond true", \ "Brazero", "Braminzero", "Braposzero", \ "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ @@ -1766,10 +1891,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \ 1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \ 0, /* XCLASS - variable length */ \ + 0, /* ECLASS - variable length */ \ 1+IMM2_SIZE, /* REF */ \ - 1+IMM2_SIZE, /* REFI */ \ + 1+IMM2_SIZE+1, /* REFI */ \ 1+2*IMM2_SIZE, /* DNREF */ \ - 1+2*IMM2_SIZE, /* DNREFI */ \ + 1+2*IMM2_SIZE+1, /* DNREFI */ \ 1+LINK_SIZE, /* RECURSE */ \ 1+2*LINK_SIZE+1, /* CALLOUT */ \ 0, /* CALLOUT_STR - variable length */ \ @@ -1786,6 +1912,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE, /* Assert behind not */ \ 1+LINK_SIZE, /* NA Assert */ \ 1+LINK_SIZE, /* NA Assert behind */ \ + 1+LINK_SIZE, /* Scan substring */ \ 1+LINK_SIZE, /* ONCE */ \ 1+LINK_SIZE, /* SCRIPT_RUN */ \ 1+LINK_SIZE, /* BRA */ \ @@ -1815,6 +1942,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */ #define RREF_ANY 0xffff +/* Constants used by OP_REFI and OP_DNREFI to control matching behaviour. */ + +#define REFI_FLAG_CASELESS_RESTRICT 0x1 +#define REFI_FLAG_TURKISH_CASING 0x2 + /* ---------- Private structures that are mode-independent. ---------- */ @@ -1890,6 +2022,14 @@ typedef struct { #define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) #define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch)) #define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) +#define UCD_ANY_I(ch) \ + /* match any of the four characters 'i', 'I', U+0130, U+0131 */ \ + (((uint32_t)(ch) | 0x20u) == 0x69u || ((uint32_t)(ch) | 1u) == 0x0131u) +#define UCD_DOTTED_I(ch) \ + ((uint32_t)(ch) == 0x69u || (uint32_t)(ch) == 0x0130u) +#define UCD_FOLD_I_TURKISH(ch) \ + ((uint32_t)(ch) == 0x0130u ? 0x69u : \ + (uint32_t)(ch) == 0x49u ? 0x0131u : (uint32_t)(ch)) /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words that form a bitmap representing a list of scripts or boolean properties. These @@ -1955,6 +2095,9 @@ extern const uint8_t PRIV(utf8_table4)[]; #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) #define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_) #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) +#define _pcre2_ucd_turkish_dotted_i_caseset PCRE2_SUFFIX(_pcre2_ucd_turkish_dotted_i_caseset_) +#define _pcre2_ucd_nocase_ranges PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_) +#define _pcre2_ucd_nocase_ranges_size PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_size_) #define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) #define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) #define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_) @@ -1971,14 +2114,17 @@ extern const uint8_t PRIV(utf8_table4)[]; extern const uint8_t PRIV(OP_lengths)[]; extern const uint32_t PRIV(callout_end_delims)[]; extern const uint32_t PRIV(callout_start_delims)[]; -extern const pcre2_compile_context PRIV(default_compile_context); -extern const pcre2_convert_context PRIV(default_convert_context); -extern const pcre2_match_context PRIV(default_match_context); +extern pcre2_compile_context PRIV(default_compile_context); +extern pcre2_convert_context PRIV(default_convert_context); +extern pcre2_match_context PRIV(default_match_context); extern const uint8_t PRIV(default_tables)[]; extern const uint32_t PRIV(hspace_list)[]; extern const uint32_t PRIV(vspace_list)[]; extern const uint32_t PRIV(ucd_boolprop_sets)[]; extern const uint32_t PRIV(ucd_caseless_sets)[]; +extern const uint32_t PRIV(ucd_turkish_dotted_i_caseset); +extern const uint32_t PRIV(ucd_nocase_ranges)[]; +extern const uint32_t PRIV(ucd_nocase_ranges_size); extern const uint32_t PRIV(ucd_digit_sets)[]; extern const uint32_t PRIV(ucd_script_sets)[]; extern const ucd_record PRIV(ucd_records)[]; @@ -2039,11 +2185,12 @@ is available. */ #define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_) #define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) #define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) +#define _pcre2_eclass PCRE2_SUFFIX(_pcre2_eclass_) extern int _pcre2_auto_possessify(PCRE2_UCHAR *, const compile_block *); extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *, - int *, uint32_t, uint32_t, BOOL, compile_block *); + int *, uint32_t, uint32_t, uint32_t, BOOL, compile_block *); extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR, BOOL, int *); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); @@ -2066,7 +2213,9 @@ extern int _pcre2_study(pcre2_real_code *); extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); -extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); +extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL); +extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR, + const uint8_t *, BOOL); /* This function is needed only when memmove() is not available. */ @@ -2079,6 +2228,8 @@ extern void * _pcre2_memmove(void *, const void *, size_t); extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int); +#include "pcre2_util.h" + #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ /* End of pcre2_internal.h */ diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 9bd9e69..6b85813 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -47,7 +47,7 @@ to have access to the hidden structures at all supported widths. Some of the mode-dependent macros are required at different widths for different parts of the pcre2test code (in particular, the included -pcre_printint.c file). We undefine them here so that they can be re-defined for +pcre2_printint.c file). We undefine them here so that they can be re-defined for multiple inclusions. Not all of these are used in pcre2test, but it's easier just to undefine them all. */ @@ -435,7 +435,7 @@ UTF-16 mode. */ c = *eptr; \ if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); -/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the +/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the pointer, incrementing length if there is a low surrogate. This is called when we do not know if we are in UTF-16 mode. */ @@ -556,6 +556,11 @@ code that uses them is simpler because it assumes this. */ /* The real general context structure. At present it holds only data for custom memory control. */ +/* WARNING: if this is ever changed, code in pcre2_substitute.c will have to be +changed because it builds a general context "by hand" in order to avoid the +malloc() call in pcre2_general_context)_create(). There is also code in +pcre2_match.c that makes the same assumption. */ + typedef struct pcre2_real_general_context { pcre2_memctl memctl; } pcre2_real_general_context; @@ -574,6 +579,7 @@ typedef struct pcre2_real_compile_context { uint32_t parens_nest_limit; uint32_t extra_options; uint32_t max_varlookbehind; + uint32_t optimization_flags; } pcre2_real_compile_context; /* The real match context structure. */ @@ -584,10 +590,13 @@ typedef struct pcre2_real_match_context { pcre2_jit_callback jit_callback; void *jit_callback_data; #endif - int (*callout)(pcre2_callout_block *, void *); - void *callout_data; - int (*substitute_callout)(pcre2_substitute_callout_block *, void *); - void *substitute_callout_data; + int (*callout)(pcre2_callout_block *, void *); + void *callout_data; + int (*substitute_callout)(pcre2_substitute_callout_block *, void *); + void *substitute_callout_data; + PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *); + void *substitute_case_callout_data; PCRE2_SIZE offset_limit; uint32_t heap_limit; uint32_t match_limit; @@ -623,6 +632,7 @@ typedef struct pcre2_real_code { void *executable_jit; /* Pointer to JIT code */ uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */ + CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */ uint32_t magic_number; /* Paranoid and endianness check */ uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t overall_options; /* Options after processing the pattern */ @@ -641,6 +651,7 @@ typedef struct pcre2_real_code { uint16_t top_backref; /* Highest numbered back reference */ uint16_t name_entry_size; /* Size (code units) of table entries */ uint16_t name_count; /* Number of name entries in the table */ + uint32_t optimization_flags; /* Optimizations enabled at compile time */ } pcre2_real_code; /* The real match data structure. Define ovector as large as it can ever @@ -716,6 +727,23 @@ typedef struct named_group { uint16_t isdup; /* TRUE if a duplicate */ } named_group; +/* Structure for caching sorted ranges. This improves the performance +of translating META code to byte code. */ + +typedef struct class_ranges { + struct class_ranges *next; /* Next class ranges */ + size_t char_lists_size; /* Total size of encoded char lists */ + size_t char_lists_start; /* Start offset of encoded char lists */ + uint16_t range_list_size; /* Size of ranges array */ + uint16_t char_lists_types; /* The XCL_LIST header of char lists */ + /* Followed by the list of ranges (start/end pairs) */ +} class_ranges; + +typedef union class_bits_storage { + uint8_t classbits[32]; + uint32_t classwords[8]; +} class_bits_storage; + /* Structure for passing "static" information around between the functions doing the compiling, so that they are thread-safe. */ @@ -725,14 +753,15 @@ typedef struct compile_block { const uint8_t *fcc; /* Points to case-flipping table */ const uint8_t *cbits; /* Points to character type table */ const uint8_t *ctypes; /* Points to table of type maps */ - PCRE2_SPTR start_workspace; /* The start of working space */ - PCRE2_SPTR start_code; /* The start of the compiled code */ + PCRE2_UCHAR *start_workspace; /* The start of working space */ + PCRE2_UCHAR *start_code; /* The start of the compiled code */ PCRE2_SPTR start_pattern; /* The start of the pattern */ PCRE2_SPTR end_pattern; /* The end of the pattern */ PCRE2_UCHAR *name_table; /* The name/number table */ PCRE2_SIZE workspace_size; /* Size of workspace */ PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */ PCRE2_SIZE erroroffset; /* Offset of error in pattern */ + class_bits_storage classbits; /* Temporary store for classbits */ uint16_t names_found; /* Number of entries so far */ uint16_t name_entry_size; /* Size of each entry */ uint16_t parens_depth; /* Depth of nested parentheses */ @@ -750,9 +779,9 @@ typedef struct compile_block { uint32_t backref_map; /* Bitmap of low back refs */ uint32_t nltype; /* Newline type */ uint32_t nllen; /* Newline string length */ - uint32_t class_range_start; /* Overall class range start */ - uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ + uint8_t class_op_used[ECLASS_NEST_LIMIT]; /* Operation used for + extended classes */ uint32_t req_varyopt; /* "After variable item" flag for reqbyte */ uint32_t max_varlookbehind; /* Limit for variable lookbehinds */ int max_lookbehind; /* Maximum lookbehind encountered (characters) */ @@ -760,6 +789,11 @@ typedef struct compile_block { BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_recurse; /* Had a pattern recursion or subroutine call */ BOOL dupnames; /* Duplicate names exist */ +#ifdef SUPPORT_WIDE_CHARS + class_ranges *cranges; /* First class range. */ + class_ranges *next_cranges; /* Next class range. */ + size_t char_lists_size; /* Current size of character lists */ +#endif } compile_block; /* Structure for keeping the properties of the in-memory stack used @@ -793,7 +827,7 @@ typedef struct heapframe { to RRMATCH(), but which do not need to be copied to new frames. */ PCRE2_SPTR ecode; /* The current position in the pattern */ - PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ + PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE2_SPTR values */ PCRE2_SIZE length; /* Used for character, string, or code lengths */ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ @@ -841,11 +875,10 @@ typedef struct heapframe { PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } heapframe; -/* This typedef is a check that the size of the heapframe structure is a -multiple of PCRE2_SIZE. See various comments above. */ +/* Assert that the size of the heapframe structure is a multiple of PCRE2_SIZE. +See various comments above. */ -typedef char check_heapframe_size[ - ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; +STATIC_ASSERT((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0, heapframe_size); /* Structure for computing the alignment of heapframe. */ diff --git a/src/pcre2_jit_char_inc.h b/src/pcre2_jit_char_inc.h new file mode 100644 index 0000000..69fe938 --- /dev/null +++ b/src/pcre2_jit_char_inc.h @@ -0,0 +1,2280 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* XClass matching code. */ + +#ifdef SUPPORT_WIDE_CHARS + +#define ECLASS_CHAR_DATA STACK_TOP +#define ECLASS_STACK_DATA STACK_LIMIT + +#define SET_CHAR_OFFSET(value) \ + if ((value) != charoffset) \ + { \ + if ((value) < charoffset) \ + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ + else \ + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ + } \ + charoffset = (value); + +#define READ_FROM_CHAR_LIST(destination) \ + if (list_ind <= 1) \ + { \ + destination = *(const uint16_t*)next_char; \ + next_char += 2; \ + } \ + else \ + { \ + destination = *(const uint32_t*)next_char; \ + next_char += 4; \ + } + +#define XCLASS_LOCAL_RANGES_SIZE 32 +#define XCLASS_LOCAL_RANGES_LOG2_SIZE 5 + +typedef struct xclass_stack_item { + sljit_u32 first_item; + sljit_u32 last_item; + struct sljit_jump *jump; +} xclass_stack_item; + +typedef struct xclass_ranges { + size_t range_count; + /* Pointer to ranges. A stack area is provided when a small buffer is enough. */ + uint32_t *ranges; + uint32_t local_ranges[XCLASS_LOCAL_RANGES_SIZE * 2]; + /* Stack size must be log2(ranges / 2). */ + xclass_stack_item *stack; + xclass_stack_item local_stack[XCLASS_LOCAL_RANGES_LOG2_SIZE]; +} xclass_ranges; + +static void xclass_compute_ranges(compiler_common *common, PCRE2_SPTR cc, xclass_ranges *ranges) +{ +DEFINE_COMPILER; +size_t range_count = 0, est_range_count; +size_t est_stack_size, tmp; +uint32_t type, list_ind; +uint32_t est_type; +uint32_t char_list_add, range_start, range_end; +const uint8_t *next_char; +const uint8_t *est_next_char; +#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +BOOL utf = common->utf; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + +if (*cc == XCL_SINGLE || *cc == XCL_RANGE) + { + /* Only a few ranges are present. */ + do + { + type = *cc++; + SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE); + GETCHARINCTEST(range_end, cc); + ranges->ranges[range_count] = range_end; + + if (type == XCL_RANGE) + { + GETCHARINCTEST(range_end, cc); + } + + ranges->ranges[range_count + 1] = range_end; + range_count += 2; + } + while (*cc != XCL_END); + + SLJIT_ASSERT(range_count <= XCLASS_LOCAL_RANGES_SIZE); + ranges->range_count = range_count; + return; + } + +SLJIT_ASSERT(cc[0] >= XCL_LIST); +#if PCRE2_CODE_UNIT_WIDTH == 8 +type = (uint32_t)(cc[0] << 8) | cc[1]; +cc += 2; +#else +type = cc[0]; +cc++; +#endif /* CODE_UNIT_WIDTH */ + +/* Align characters. */ +next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1); +type &= XCL_TYPE_MASK; + +/* Estimate size. */ +est_next_char = next_char; +est_type = type; +est_range_count = 0; +list_ind = 0; + +while (est_type > 0) + { + uint32_t item_count = est_type & XCL_ITEM_COUNT_MASK; + + if (item_count == XCL_ITEM_COUNT_MASK) + { + if (list_ind <= 1) + { + item_count = *(const uint16_t*)est_next_char; + est_next_char += 2; + } + else + { + item_count = *(const uint32_t*)est_next_char; + est_next_char += 4; + } + } + + est_type >>= XCL_TYPE_BIT_LEN; + est_next_char += (size_t)item_count << (list_ind <= 1 ? 1 : 2); + list_ind++; + est_range_count += item_count + 1; + } + +if (est_range_count > XCLASS_LOCAL_RANGES_SIZE) + { + est_stack_size = 0; + tmp = est_range_count - 1; + + /* Compute log2(est_range_count) */ + while (tmp > 0) + { + est_stack_size++; + tmp >>= 1; + } + + ranges->stack = (xclass_stack_item*)SLJIT_MALLOC((sizeof(xclass_stack_item) * est_stack_size) + + ((sizeof(uint32_t) << 1) * (size_t)est_range_count), compiler->allocator_data); + + if (ranges->stack == NULL) + { + sljit_set_compiler_memory_error(compiler); + ranges->ranges = NULL; + return; + } + + ranges->ranges = (uint32_t*)(ranges->stack + est_stack_size); + } + +char_list_add = XCL_CHAR_LIST_LOW_16_ADD; +range_start = ~(uint32_t)0; +list_ind = 0; + +if ((type & XCL_BEGIN_WITH_RANGE) != 0) + range_start = XCL_CHAR_LIST_LOW_16_START; + +while (type > 0) + { + uint32_t item_count = type & XCL_ITEM_COUNT_MASK; + + if (item_count == XCL_ITEM_COUNT_MASK) + { + READ_FROM_CHAR_LIST(item_count); + SLJIT_ASSERT(item_count >= XCL_ITEM_COUNT_MASK); + } + + while (item_count > 0) + { + READ_FROM_CHAR_LIST(range_end); + + if ((range_end & XCL_CHAR_END) != 0) + { + range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + if (range_start == ~(uint32_t)0) + range_start = range_end; + + ranges->ranges[range_count] = range_start; + ranges->ranges[range_count + 1] = range_end; + range_count += 2; + range_start = ~(uint32_t)0; + } + else + range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + item_count--; + } + + list_ind++; + type >>= XCL_TYPE_BIT_LEN; + + if (range_start == ~(uint32_t)0) + { + if ((type & XCL_BEGIN_WITH_RANGE) != 0) + { + if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START; +#if PCRE2_CODE_UNIT_WIDTH == 32 + else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START; + else range_start = XCL_CHAR_LIST_HIGH_32_START; +#else + else range_start = XCL_CHAR_LIST_LOW_32_START; +#endif + } + } + else if ((type & XCL_BEGIN_WITH_RANGE) == 0) + { + if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END; + else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END; +#if PCRE2_CODE_UNIT_WIDTH == 32 + else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END; + else range_end = XCL_CHAR_LIST_HIGH_32_END; +#else + else range_end = XCL_CHAR_LIST_LOW_32_END; +#endif + + ranges->ranges[range_count] = range_start; + ranges->ranges[range_count + 1] = range_end; + range_count += 2; + range_start = ~(uint32_t)0; + } + + if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD; +#if PCRE2_CODE_UNIT_WIDTH == 32 + else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD; + else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD; +#else + else char_list_add = XCL_CHAR_LIST_LOW_32_ADD; +#endif + } + +SLJIT_ASSERT(range_count > 0 && range_count <= (est_range_count << 1)); +SLJIT_ASSERT(next_char <= (const uint8_t*)common->start); +ranges->range_count = range_count; +} + +static void xclass_check_bitset(compiler_common *common, const sljit_u8 *bitset, jump_list **found, jump_list **backtracks) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +if (!optimize_class(common, bitset, (bitset[31] & 0x80) != 0, TRUE, found)) + { + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)bitset); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); + add_jump(compiler, found, JUMP(SLJIT_NOT_ZERO)); + } + +add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); +JUMPHERE(jump); +} + +#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + +static void xclass_update_min_max(compiler_common *common, PCRE2_SPTR cc, sljit_u32 *min_ptr, sljit_u32 *max_ptr) +{ +uint32_t type, list_ind, c; +sljit_u32 min = *min_ptr; +sljit_u32 max = *max_ptr; +uint32_t char_list_add; +const uint8_t *next_char; +BOOL utf = TRUE; + +/* This function is pointless without utf 8/16. */ +SLJIT_ASSERT(common->utf); +if (*cc == XCL_SINGLE || *cc == XCL_RANGE) + { + /* Only a few ranges are present. */ + do + { + type = *cc++; + SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE); + GETCHARINCTEST(c, cc); + + if (c < min) + min = c; + + if (type == XCL_RANGE) + { + GETCHARINCTEST(c, cc); + } + + if (c > max) + max = c; + } + while (*cc != XCL_END); + + SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max); + *min_ptr = min; + *max_ptr = max; + return; + } + +SLJIT_ASSERT(cc[0] >= XCL_LIST); +#if PCRE2_CODE_UNIT_WIDTH == 8 +type = (uint32_t)(cc[0] << 8) | cc[1]; +cc += 2; +#else +type = cc[0]; +cc++; +#endif /* CODE_UNIT_WIDTH */ + +/* Align characters. */ +next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1); +type &= XCL_TYPE_MASK; + +SLJIT_ASSERT(type != 0); + +/* Detect minimum. */ + +/* Skip unused ranges. */ +list_ind = 0; +while ((type & (XCL_BEGIN_WITH_RANGE | XCL_ITEM_COUNT_MASK)) == 0) + { + type >>= XCL_TYPE_BIT_LEN; + list_ind++; + } + +SLJIT_ASSERT(list_ind <= 2); +switch (list_ind) + { + case 0: + char_list_add = XCL_CHAR_LIST_LOW_16_ADD; + c = XCL_CHAR_LIST_LOW_16_START; + break; + + case 1: + char_list_add = XCL_CHAR_LIST_HIGH_16_ADD; + c = XCL_CHAR_LIST_HIGH_16_START; + break; + + default: + char_list_add = XCL_CHAR_LIST_LOW_32_ADD; + c = XCL_CHAR_LIST_LOW_32_START; + break; + } + +if ((type & XCL_BEGIN_WITH_RANGE) != 0) + { + if (c < min) + min = c; + } +else + { + if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK) + { + if (list_ind <= 1) + c = *(const uint16_t*)(next_char + 2); + else + c = *(const uint32_t*)(next_char + 4); + } + else + { + if (list_ind <= 1) + c = *(const uint16_t*)next_char; + else + c = *(const uint32_t*)next_char; + } + + c = char_list_add + (c >> XCL_CHAR_SHIFT); + if (c < min) + min = c; + } + +/* Detect maximum. */ + +/* Skip intermediate ranges. */ +while (TRUE) + { + if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK) + { + if (list_ind <= 1) + { + c = *(const uint16_t*)next_char; + next_char += (c + 1) << 1; + } + else + { + c = *(const uint32_t*)next_char; + next_char += (c + 1) << 2; + } + } + else + next_char += (type & XCL_ITEM_COUNT_MASK) << (list_ind <= 1 ? 1 : 2); + + if ((type >> XCL_TYPE_BIT_LEN) == 0) + break; + + list_ind++; + type >>= XCL_TYPE_BIT_LEN; + } + +SLJIT_ASSERT(list_ind <= 2 && type != 0); +switch (list_ind) + { + case 0: + char_list_add = XCL_CHAR_LIST_LOW_16_ADD; + c = XCL_CHAR_LIST_LOW_16_END; + break; + + case 1: + char_list_add = XCL_CHAR_LIST_HIGH_16_ADD; + c = XCL_CHAR_LIST_HIGH_16_END; + break; + + default: + char_list_add = XCL_CHAR_LIST_LOW_32_ADD; + c = XCL_CHAR_LIST_LOW_32_END; + break; + } + +if ((type & XCL_ITEM_COUNT_MASK) != 0) + { + /* Type is reused as temporary. */ + if (list_ind <= 1) + type = *(const uint16_t*)(next_char - 2); + else + type = *(const uint32_t*)(next_char - 4); + + if (type & XCL_CHAR_END) + c = char_list_add + (type >> XCL_CHAR_SHIFT); + } + +if (c > max) + max = c; + +SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max); +*min_ptr = min; +*max_ptr = max; +} + +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + +#define XCLASS_IS_ECLASS 0x001 +#ifdef SUPPORT_UNICODE +#define XCLASS_SAVE_CHAR 0x002 +#define XCLASS_HAS_TYPE 0x004 +#define XCLASS_HAS_SCRIPT 0x008 +#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 +#define XCLASS_HAS_BOOL 0x020 +#define XCLASS_HAS_BIDICL 0x040 +#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) +#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0 0x200 +#endif /* SUPPORT_UNICODE */ + +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); + +/* TMP3 must be preserved because it is used by compile_iterator_matchingpath. */ +static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, sljit_u32 status) +{ +DEFINE_COMPILER; +jump_list *found = NULL; +jump_list *check_result = NULL; +jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; +sljit_uw c, charoffset; +sljit_u32 max = READ_CHAR_MAX, min = 0; +struct sljit_jump *jump = NULL; +PCRE2_UCHAR flags; +PCRE2_SPTR ccbegin; +sljit_u32 compares, invertcmp, depth; +sljit_u32 first_item, last_item, mid_item; +sljit_u32 range_start, range_end; +xclass_ranges ranges; +BOOL has_cmov, last_range_set; + +#ifdef SUPPORT_UNICODE +sljit_u32 category_list = 0; +sljit_u32 items; +int typereg = TMP1; +#endif /* SUPPORT_UNICODE */ + +SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw)); +/* Scanning the necessary info. */ +flags = *cc++; +ccbegin = cc; +compares = 0; + +if (flags & XCL_MAP) + cc += 32 / sizeof(PCRE2_UCHAR); + +#ifdef SUPPORT_UNICODE +while (*cc == XCL_PROP || *cc == XCL_NOTPROP) + { + compares++; + cc++; + + items = 0; + + switch(*cc) + { + case PT_LAMP: + items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); + break; + + case PT_GC: + items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); + break; + + case PT_PC: + items = UCPCAT(cc[1]); + break; + + case PT_WORD: + items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; + break; + + case PT_ALNUM: + items = UCPCAT_L | UCPCAT_N; + break; + + case PT_SCX: + status |= XCLASS_HAS_SCRIPT_EXTENSION; + if (cc[-1] == XCL_NOTPROP) + { + status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; + break; + } + compares++; + /* Fall through */ + + case PT_SC: + status |= XCLASS_HAS_SCRIPT; + break; + + case PT_SPACE: + case PT_PXSPACE: + case PT_PXGRAPH: + case PT_PXPRINT: + case PT_PXPUNCT: + status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; + break; + + case PT_UCNC: + case PT_PXXDIGIT: + status |= XCLASS_SAVE_CHAR; + break; + + case PT_BOOL: + status |= XCLASS_HAS_BOOL; + break; + + case PT_BIDICL: + status |= XCLASS_HAS_BIDICL; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + if (items > 0) + { + if (cc[-1] == XCL_NOTPROP) + items ^= UCPCAT_ALL; + category_list |= items; + status |= XCLASS_HAS_TYPE; + compares--; + } + + cc += 2; + } + +if (category_list == UCPCAT_ALL) + { + /* All or no characters are accepted, same as dotall. */ + if (status & XCLASS_IS_ECLASS) + { + if (list != backtracks) + OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + return; + } + + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } + +if (category_list != 0) + compares++; +#endif + +if (*cc != XCL_END) + { +#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS)) + { + SLJIT_ASSERT(category_list == 0); + max = 0; + min = (flags & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX; + xclass_update_min_max(common, cc, &min, &max); + } +#endif + compares++; +#ifdef SUPPORT_UNICODE + status |= XCLASS_SAVE_CHAR; +#endif /* SUPPORT_UNICODE */ + } + +#ifdef SUPPORT_UNICODE +SLJIT_ASSERT(compares > 0 || category_list != 0); +#else /* !SUPPORT_UNICODE */ +SLJIT_ASSERT(compares > 0); +#endif /* SUPPORT_UNICODE */ + +/* We are not necessary in utf mode even in 8 bit mode. */ +cc = ccbegin; +if (!(status & XCLASS_IS_ECLASS)) + { + if ((flags & XCL_NOT) != 0) + read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + { +#ifdef SUPPORT_UNICODE + read_char(common, min, max, (status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); +#else /* !SUPPORT_UNICODE */ + read_char(common, min, max, NULL, 0); +#endif /* SUPPORT_UNICODE */ + } + } + +if ((flags & XCL_MAP) != 0) + { + SLJIT_ASSERT(!(status & XCLASS_IS_ECLASS)); + xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks); + cc += 32 / sizeof(PCRE2_UCHAR); + } + +#ifdef SUPPORT_UNICODE +if (status & XCLASS_NEEDS_UCD) + { + if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR) + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (!common->utf) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, UNASSIGNED_UTF_CHAR, TMP1); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + + OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + ccbegin = cc; + + if (status & XCLASS_HAS_BIDICL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); + + while (*cc == XCL_PROP || *cc == XCL_NOTPROP) + { + cc++; + + if (*cc == PT_BIDICL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + cc += 2; + } + + cc = ccbegin; + } + + if (status & XCLASS_HAS_BOOL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + while (*cc == XCL_PROP || *cc == XCL_NOTPROP) + { + cc++; + if (*cc == PT_BOOL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + cc += 2; + } + + cc = ccbegin; + } + + if (status & XCLASS_HAS_SCRIPT) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + + while (*cc == XCL_PROP || *cc == XCL_NOTPROP) + { + cc++; + + switch (*cc) + { + case PT_SCX: + if (cc[-1] == XCL_NOTPROP) + break; + /* Fall through */ + + case PT_SC: + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + + add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); + } + cc += 2; + } + + cc = ccbegin; + } + + if (status & XCLASS_HAS_SCRIPT_EXTENSION) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + if (status & XCLASS_SCRIPT_EXTENSION_NOTPROP) + { + if (status & XCLASS_HAS_TYPE) + { + if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP2, 0); + status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0; + } + else + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; + } + } + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + } + + while (*cc == XCL_PROP || *cc == XCL_NOTPROP) + { + cc++; + + if (*cc == PT_SCX) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + + jump = NULL; + if (cc[-1] == XCL_NOTPROP) + { + jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); + if (invertcmp) + { + add_jump(compiler, backtracks, jump); + jump = NULL; + } + invertcmp ^= 0x1; + } + + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + + if (jump != NULL) + JUMPHERE(jump); + } + cc += 2; + } + + if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + else if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + cc = ccbegin; + } + + if (status & XCLASS_SAVE_CHAR) + OP1(SLJIT_MOV, TMP1, 0, (status & XCLASS_IS_ECLASS) ? ECLASS_CHAR_DATA : RETURN_ADDR, 0); + + if (status & XCLASS_HAS_TYPE) + { + if (status & XCLASS_SAVE_CHAR) + typereg = RETURN_ADDR; + + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0); + + if (category_list > 0) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* Generating code. */ +charoffset = 0; + +#ifdef SUPPORT_UNICODE +while (*cc == XCL_PROP || *cc == XCL_NOTPROP) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + jump = NULL; + + if (*cc == XCL_NOTPROP) + invertcmp ^= 0x1; + cc++; + switch(*cc) + { + case PT_LAMP: + case PT_GC: + case PT_PC: + case PT_SC: + case PT_SCX: + case PT_BOOL: + case PT_BIDICL: + case PT_WORD: + case PT_ALNUM: + compares++; + /* Already handled. */ + break; + + case PT_SPACE: + case PT_PXSPACE: + SET_CHAR_OFFSET(9); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_UCNC: + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + SET_CHAR_OFFSET(0xa0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + SET_CHAR_OFFSET(0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXGRAPH: + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + + c = charoffset; + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPRINT: + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + + c = charoffset; + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPUNCT: + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + + SET_CHAR_OFFSET(0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); + OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXXDIGIT: + SET_CHAR_OFFSET(CHAR_A); + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(CHAR_0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff21); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff41); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + + JUMPHERE(jump); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + cc += 2; + + if (jump != NULL) + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + +if (compares == 0) + { + if (found != NULL) + set_jumps(found, LABEL()); + + if (status & XCLASS_IS_ECLASS) + OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + return; + } +#endif /* SUPPORT_UNICODE */ + +SLJIT_ASSERT(compares == 1); +ranges.range_count = 0; +ranges.ranges = ranges.local_ranges; +ranges.stack = ranges.local_stack; + +xclass_compute_ranges(common, cc, &ranges); + +/* Memory error is set for the compiler. */ +if (ranges.stack == NULL) + return; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) && \ + defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +if (common->utf) + { + min = READ_CHAR_MAX; + max = 0; + xclass_update_min_max(common, cc, &min, &max); + SLJIT_ASSERT(ranges.ranges[0] == min && ranges.ranges[ranges.range_count - 1] == max); + } +#endif /* SLJIT_DEBUG && SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + +invertcmp = (list != backtracks); + +if (ranges.range_count == 2) + { + range_start = ranges.ranges[0]; + range_end = ranges.ranges[1]; + + if (range_start < range_end) + { + SET_CHAR_OFFSET(range_start); + jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)); + } + else + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset)); + + add_jump(compiler, backtracks, jump); + + SLJIT_ASSERT(ranges.stack == ranges.local_stack); + if (found != NULL) + set_jumps(found, LABEL()); + + if (status & XCLASS_IS_ECLASS) + OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + return; + } + +range_start = ranges.ranges[0]; +SET_CHAR_OFFSET(range_start); +if (ranges.range_count >= 6) + { + /* Early fail. */ + range_end = ranges.ranges[ranges.range_count - 1]; + add_jump(compiler, (flags & XCL_NOT) == 0 ? backtracks : &found, + CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start))); + } + +depth = 0; +first_item = 0; +last_item = ranges.range_count - 2; +has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0; + +while (TRUE) + { + /* At least two items are present. */ + SLJIT_ASSERT(first_item < last_item && charoffset == ranges.ranges[0]); + last_range_set = FALSE; + + if (first_item + 6 <= last_item) + { + mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1; + SLJIT_ASSERT(last_item >= mid_item + 4); + + range_end = ranges.ranges[mid_item + 1]; + if (first_item + 6 > mid_item && ranges.ranges[mid_item] == range_end) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset)); + ranges.stack[depth].jump = JUMP(SLJIT_GREATER); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + last_range_set = TRUE; + } + else + ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset)); + + ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2); + ranges.stack[depth].last_item = (sljit_u32)last_item; + + depth++; + SLJIT_ASSERT(ranges.stack == ranges.local_stack ? + depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges); + + last_item = mid_item; + if (!last_range_set) + continue; + + last_item -= 2; + } + + if (!last_range_set) + { + range_start = ranges.ranges[first_item]; + range_end = ranges.ranges[first_item + 1]; + + if (range_start < range_end) + { + SET_CHAR_OFFSET(range_start); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + } + else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + } + first_item += 2; + } + + SLJIT_ASSERT(first_item <= last_item); + + do + { + range_start = ranges.ranges[first_item]; + range_end = ranges.ranges[first_item + 1]; + + if (range_start < range_end) + { + SET_CHAR_OFFSET(range_start); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)); + + if (has_cmov) + SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2); + else + OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL); + } + else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset)); + + if (has_cmov) + SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2); + else + OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + } + + first_item += 2; + } + while (first_item <= last_item); + + if (depth == 0) break; + + add_jump(compiler, &check_result, JUMP(SLJIT_JUMP)); + + /* The charoffset resets after the end of a branch is reached. */ + charoffset = ranges.ranges[0]; + depth--; + first_item = ranges.stack[depth].first_item; + last_item = ranges.stack[depth].last_item; + JUMPHERE(ranges.stack[depth].jump); + } + +if (check_result != NULL) + set_jumps(check_result, LABEL()); + +if (has_cmov) + jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); +else + { + sljit_set_current_flags(compiler, SLJIT_SET_Z); + jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp); + } + +add_jump(compiler, backtracks, jump); + +if (found != NULL) + set_jumps(found, LABEL()); + +if (status & XCLASS_IS_ECLASS) + OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + +if (ranges.stack != ranges.local_stack) + SLJIT_FREE(ranges.stack, compiler->allocator_data); +} + +static PCRE2_SPTR compile_eclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) +{ +DEFINE_COMPILER; +PCRE2_SPTR end = cc + GET(cc, 0) - 1; +PCRE2_SPTR begin; +jump_list *not_found; +jump_list *found = NULL; + +cc += LINK_SIZE; + +/* Should be optimized later. */ +read_char(common, 0, READ_CHAR_MAX, backtracks, 0); + +if (((*cc++) & ECL_MAP) != 0) + { + xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks); + cc += 32 / sizeof(PCRE2_UCHAR); + } + +begin = cc; + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, ECLASS_CHAR_DATA, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, ECLASS_STACK_DATA, 0); +OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0); +OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, TMP1, 0); + +/* All eclass must start with an xclass. */ +SLJIT_ASSERT(*cc == ECL_XCLASS); + +while (cc < end) + { + switch (*cc) + { + case ECL_AND: + ++cc; + OP2(SLJIT_OR, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, ~(sljit_sw)1); + OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + OP2(SLJIT_AND, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0); + break; + + case ECL_OR: + ++cc; + OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0); + break; + + case ECL_XOR: + ++cc; + OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0); + break; + + case ECL_NOT: + ++cc; + OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + break; + + default: + SLJIT_ASSERT(*cc == ECL_XCLASS); + if (cc != begin) + { + OP1(SLJIT_MOV, TMP1, 0, ECLASS_CHAR_DATA, 0); + OP2(SLJIT_SHL, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1); + } + + not_found = NULL; + compile_xclass_matchingpath(common, cc + 1 + LINK_SIZE, ¬_found, XCLASS_IS_ECLASS); + set_jumps(not_found, LABEL()); + + cc += GET(cc, 1); + break; + } + } + +OP2U(SLJIT_SUB | SLJIT_SET_Z, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0); +OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); +OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); +add_jump(compiler, backtracks, JUMP(SLJIT_EQUAL)); +set_jumps(found, LABEL()); +return end; +} + +/* Generic character matching code. */ + +#undef SET_CHAR_OFFSET +#undef READ_FROM_CHAR_LIST +#undef XCLASS_LOCAL_RANGES_SIZE +#undef XCLASS_LOCAL_RANGES_LOG2_SIZE + +#endif /* SUPPORT_WIDE_CHARS */ + +static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, + compare_context *context, jump_list **backtracks) +{ +DEFINE_COMPILER; +unsigned int othercasebit = 0; +PCRE2_SPTR othercasechar = NULL; +#ifdef SUPPORT_UNICODE +int utflength; +#endif + +if (caseless && char_has_othercase(common, cc)) + { + othercasebit = char_get_othercase_bit(common, cc); + SLJIT_ASSERT(othercasebit); + /* Extracting bit difference info. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 + othercasechar = cc + (othercasebit >> 8); + othercasebit &= 0xff; +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + /* Note that this code only handles characters in the BMP. If there + ever are characters outside the BMP whose othercase differs in only one + bit from itself (there currently are none), this code will need to be + revised for PCRE2_CODE_UNIT_WIDTH == 32. */ + othercasechar = cc + (othercasebit >> 9); + if ((othercasebit & 0x100) != 0) + othercasebit = (othercasebit & 0xff) << 8; + else + othercasebit &= 0xff; +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ + } + +if (context->sourcereg == -1) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else if (context->length >= 2) + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif PCRE2_CODE_UNIT_WIDTH == 32 + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ + context->sourcereg = TMP2; + } + +#ifdef SUPPORT_UNICODE +utflength = 1; +if (common->utf && HAS_EXTRALEN(*cc)) + utflength += GET_EXTRALEN(*cc); + +do + { +#endif + + context->length -= IN_UCHARS(1); +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + + /* Unaligned read is supported. */ + if (othercasebit != 0 && othercasechar == cc) + { + context->c.asuchars[context->ucharptr] = *cc | othercasebit; + context->oc.asuchars[context->ucharptr] = othercasebit; + } + else + { + context->c.asuchars[context->ucharptr] = *cc; + context->oc.asuchars[context->ucharptr] = 0; + } + context->ucharptr++; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) +#else + if (context->ucharptr >= 2 || context->length == 0) +#endif + { + if (context->length >= 4) + OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + else if (context->length >= 2) + OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#if PCRE2_CODE_UNIT_WIDTH == 8 + else if (context->length >= 1) + OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + switch(context->ucharptr) + { + case 4 / sizeof(PCRE2_UCHAR): + if (context->oc.asint != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + break; + + case 2 / sizeof(PCRE2_UCHAR): + if (context->oc.asushort != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); + break; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + case 1: + if (context->oc.asbyte != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); + break; +#endif + + default: + SLJIT_UNREACHABLE(); + break; + } + context->ucharptr = 0; + } + +#else + + /* Unaligned read is unsupported or in 32 bit mode. */ + if (context->length >= 1) + OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + if (othercasebit != 0 && othercasechar == cc) + { + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); + +#endif + + cc++; +#ifdef SUPPORT_UNICODE + utflength--; + } +while (utflength > 0); +#endif + +return cc; +} + +#ifdef SUPPORT_UNICODE + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function, and those below it, must be kept in step (note by PH, June 2024). */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR prevcc, endcc, bptr; +BOOL first = TRUE; +BOOL was_ep_ZWJ = FALSE; +uint32_t c; + +prevcc = cc; +endcc = NULL; +do + { + GETCHARINC(c, cc); + rgb = UCD_GRAPHBREAK(c); + + if (first) + { + lgb = rgb; + endcc = cc; + first = FALSE; + continue; + } + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + ricount = 0; + bptr = prevcc; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + bptr--; + BACKCHAR(bptr); + GETCHAR(c, bptr); + + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) + break; + + ricount++; + } + + if ((ricount & 1) != 0) break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + prevcc = endcc; + endcc = cc; + } +while (cc < end_subject); + +return endcc; +} + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function, and the one below it, must be kept in step (note by PH, June 2024). */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR prevcc, endcc, bptr; +BOOL first = TRUE; +BOOL was_ep_ZWJ = FALSE; +uint32_t c; + +prevcc = cc; +endcc = NULL; +do + { + GETCHARINC_INVALID(c, cc, end_subject, break); + rgb = UCD_GRAPHBREAK(c); + + if (first) + { + lgb = rgb; + endcc = cc; + first = FALSE; + continue; + } + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + ricount = 0; + bptr = prevcc; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + GETCHARBACK_INVALID(c, bptr, start_subject, break); + + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) + break; + + ricount++; + } + + if ((ricount & 1) != 0) + break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + prevcc = endcc; + endcc = cc; + } +while (cc < end_subject); + +return endcc; +} + +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function must be kept in step (note by PH, June 2024). */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR bptr; +uint32_t c; +BOOL was_ep_ZWJ = FALSE; + +/* Patch by PH */ +/* GETCHARINC(c, cc); */ +c = *cc++; + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (c >= 0x110000) + return cc; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ +lgb = UCD_GRAPHBREAK(c); + +while (cc < end_subject) + { + c = *cc; +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) + break; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + rgb = UCD_GRAPHBREAK(c); + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + ricount = 0; + bptr = cc - 1; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + bptr--; + c = *bptr; +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) + break; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; + + ricount++; + } + + if ((ricount & 1) != 0) + break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + cc++; + } + +return cc; +} + +static void compile_clist(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) +{ +DEFINE_COMPILER; +const sljit_u32 *other_cases; +struct sljit_jump *jump; +sljit_u32 min = 0, max = READ_CHAR_MAX; +BOOL has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0; + +SLJIT_ASSERT(cc[1] == PT_CLIST); + +if (cc[0] == OP_PROP) + { + other_cases = PRIV(ucd_caseless_sets) + cc[2]; + + min = *other_cases++; + max = min; + + while (*other_cases != NOTACHAR) + { + if (*other_cases > max) max = *other_cases; + if (*other_cases < min) min = *other_cases; + other_cases++; + } + } + +other_cases = PRIV(ucd_caseless_sets) + cc[2]; +SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR); +/* The NOTACHAR is higher than any character. */ +SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); + +read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); + +/* At least two characters are required. + Otherwise this case would be handled by the normal code path. */ +/* NOTACHAR is the unsigned maximum. */ + +/* Optimizing character pairs, if their difference is power of 2. */ +if (is_powerof2(other_cases[1] ^ other_cases[0])) + { + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[1] ^ other_cases[0])); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + other_cases += 2; + } +else if (is_powerof2(other_cases[2] ^ other_cases[1])) + { + SLJIT_ASSERT(other_cases[2] != NOTACHAR); + + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[2] ^ other_cases[1])); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)other_cases[0]); + + if (has_cmov) + SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2); + else + OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + + other_cases += 3; + } +else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + } + +while (*other_cases != NOTACHAR) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++)); + + if (has_cmov) + SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2); + else + OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + } + +if (has_cmov) + jump = CMP(cc[0] == OP_PROP ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0); +else + jump = JUMP(cc[0] == OP_PROP ? SLJIT_ZERO : SLJIT_NOT_ZERO); + +add_jump(compiler, backtracks, jump); +} + +#endif /* SUPPORT_UNICODE */ + +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr) +{ +DEFINE_COMPILER; +int length; +unsigned int c, oc, bit; +compare_context context; +struct sljit_jump *jump[3]; +jump_list *end_list; +#ifdef SUPPORT_UNICODE +PCRE2_UCHAR propdata[5]; +#endif /* SUPPORT_UNICODE */ + +switch(type) + { + case OP_NOT_DIGIT: + case OP_DIGIT: + /* Digits are usually 0-9, so it is worth to optimize them. */ + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_DIGIT); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_DIGIT); + /* Flip the starting bit in the negative case. */ + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); + add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); + add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); + add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_ANY: + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + end_list = NULL; + if (common->mode != PCRE2_JIT_PARTIAL_HARD) + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); + set_jumps(end_list, LABEL()); + JUMPHERE(jump[0]); + } + else + check_newlinechar(common, common->nltype, backtracks, TRUE); + return cc; + + case OP_ALLANY: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#ifdef SUPPORT_UNICODE + if (common->utf && common->invalid_utf) + { + read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); + return cc; + } +#endif /* SUPPORT_UNICODE */ + + skip_valid_char(common); + return cc; + + case OP_ANYBYTE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + return cc; + +#ifdef SUPPORT_UNICODE + case OP_NOTPROP: + case OP_PROP: + if (check_str_ptr) + detect_partial_match(common, backtracks); + if (cc[0] == PT_CLIST) + { + compile_clist(common, cc - 1, backtracks); + return cc + 2; + } + + propdata[0] = 0; + propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; + propdata[2] = cc[0]; + propdata[3] = cc[1]; + propdata[4] = XCL_END; + compile_xclass_matchingpath(common, propdata, backtracks, 0); + return cc + 2; +#endif + + case OP_ANYNL: + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0); + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + /* We don't need to handle soft partial matching case. */ + end_list = NULL; + if (common->mode != PCRE2_JIT_PARTIAL_HARD) + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + jump[1] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[0]); + check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); + set_jumps(end_list, LABEL()); + JUMPHERE(jump[1]); + return cc; + + case OP_NOT_HSPACE: + case OP_HSPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + if (type == OP_NOT_HSPACE) + read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0x9, 0x3000, NULL, 0); + + add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + + case OP_NOT_VSPACE: + case OP_VSPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + if (type == OP_NOT_VSPACE) + read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0xa, 0x2029, NULL, 0); + + add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + +#ifdef SUPPORT_UNICODE + case OP_EXTUNI: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +#endif + + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); + + if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0); + /* Since we successfully read a char above, partial matching must occur. */ + check_partial(common, TRUE); + JUMPHERE(jump[0]); + } + return cc; +#endif + + case OP_CHAR: + case OP_CHARI: + length = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); +#endif + + if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE) + detect_partial_match(common, backtracks); + + if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) + { + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + + context.length = IN_UCHARS(length); + context.sourcereg = -1; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + context.ucharptr = 0; +#endif + return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); + } + +#ifdef SUPPORT_UNICODE + if (common->utf) + { + GETCHAR(c, cc); + } + else +#endif + c = *cc; + + SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc)); + + if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + oc = char_othercase(common, c); + read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0); + + SLJIT_ASSERT(!is_powerof2(c ^ oc)); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); + SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } + else + { + jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + JUMPHERE(jump[0]); + } + return cc + length; + + case OP_NOT: + case OP_NOTI: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + length = 1; +#ifdef SUPPORT_UNICODE + if (common->utf) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + c = *cc; + if (c < 128 && !common->invalid_utf) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + if (type == OP_NOT || !char_has_othercase(common, cc)) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + else + { + /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); + } + /* Skip the variable-length character. */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(jump[0]); + return cc + 1; + } + else +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + { + GETCHARLEN(c, cc, length); + } + } + else +#endif /* SUPPORT_UNICODE */ + c = *cc; + + if (type == OP_NOT || !char_has_othercase(common, cc)) + { + read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } + else + { + oc = char_othercase(common, c); + read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR); + bit = c ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + } + else + { + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + } + } + return cc + length; + + case OP_CLASS: + case OP_NCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; + if (type == OP_NCLASS) + read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0, bit, NULL, 0); +#else + if (type == OP_NCLASS) + read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0, 255, NULL, 0); +#endif + + if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) + return cc + 32 / sizeof(PCRE2_UCHAR); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + jump[0] = NULL; + if (common->utf) + { + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } + } +#elif PCRE2_CODE_UNIT_WIDTH != 8 + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + if (jump[0] != NULL) + JUMPHERE(jump[0]); +#endif + return cc + 32 / sizeof(PCRE2_UCHAR); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + case OP_XCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks, 0); + return cc + GET(cc, 0) - 1; + + case OP_ECLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + return compile_eclass_matchingpath(common, cc, backtracks); +#endif + } +SLJIT_UNREACHABLE(); +return cc; +} + +static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks) +{ +/* This function consumes at least one input character. */ +/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ +DEFINE_COMPILER; +PCRE2_SPTR ccbegin = cc; +compare_context context; +int size; + +context.length = 0; +do + { + if (cc >= ccend) + break; + + if (*cc == OP_CHAR) + { + size = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); +#endif + } + else if (*cc == OP_CHARI) + { + size = 1; +#ifdef SUPPORT_UNICODE + if (common->utf) + { + if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) + size = 0; + else if (HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); + } + else +#endif + if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) + size = 0; + } + else + size = 0; + + cc += 1 + size; + context.length += IN_UCHARS(size); + } +while (size > 0 && context.length <= 128); + +cc = ccbegin; +if (context.length > 0) + { + /* We have a fixed-length byte sequence. */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + + context.sourcereg = -1; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + context.ucharptr = 0; +#endif + do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); + return cc; + } + +/* A non-fixed length character will be checked if length == 0. */ +return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); +} + + diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 92f4fb8..81b91a8 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -82,7 +82,7 @@ pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data); allocator->free(ptr, allocator->memory_data); } -#include "sljit/sljitLir.c" +#include "../deps/sljit/sljit_src/sljitLir.c" #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED #error Unsupported architecture @@ -282,14 +282,14 @@ typedef struct bracket_backtrack { struct sljit_label *zero_matchingpath; /* Contains the branches of a failed condition. */ union { - /* Both for OP_COND, OP_SCOND. */ - jump_list *condfailed; + /* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */ + jump_list *no_capture; assert_backtrack *assert; /* For OP_ONCE. Less than 0 if not needed. */ int framesize; - /* For brackets with >3 alternatives. */ - struct sljit_jump *matching_mov_addr; } u; + /* For brackets with >3 alternatives. */ + struct sljit_jump *matching_mov_addr; /* Points to our private memory word on the stack. */ int private_data_ptr; } bracket_backtrack; @@ -313,14 +313,12 @@ typedef struct char_iterator_backtrack { backtrack_common common; /* Next iteration. */ struct sljit_label *matchingpath; - union { - jump_list *backtracks; - struct { - unsigned int othercasebit; - PCRE2_UCHAR chr; - BOOL enabled; - } charpos; - } u; + /* Creating a range based on the next character. */ + struct { + unsigned int othercasebit; + PCRE2_UCHAR chr; + BOOL charpos_enabled; + } charpos; } char_iterator_backtrack; typedef struct ref_iterator_backtrack { @@ -408,6 +406,10 @@ typedef struct compiler_common { then_trap_backtrack *then_trap; /* Starting offset of private data for capturing brackets. */ sljit_s32 cbra_ptr; +#if defined SLJIT_DEBUG && SLJIT_DEBUG + /* End offset of locals for assertions. */ + sljit_s32 locals_size; +#endif /* Output vector starting point. Must be divisible by 2. */ sljit_s32 ovector_start; /* Points to the starting character of the current match. */ @@ -429,6 +431,11 @@ typedef struct compiler_common { Each item must have a previous offset and type (see control_types) values. See do_search_mark. */ sljit_s32 control_head_ptr; + /* The offset of the saved STR_END in the outermost + scan substring block. Since scan substring restores + STR_END after a match, it is enough to restore + STR_END inside a scan substring block. */ + sljit_s32 restore_end_ptr; /* Points to the last matched capture block index. */ sljit_s32 capture_last_ptr; /* Fast forward skipping byte code pointer. */ @@ -513,7 +520,6 @@ typedef struct compiler_common { BOOL invalid_utf; BOOL ucp; /* Points to saving area for iref. */ - sljit_s32 iref_ptr; jump_list *getucd; jump_list *getucdtype; #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -603,14 +609,14 @@ typedef struct compare_context { #endif /* Local space layout. */ -/* These two locals can be used by the current opcode. */ -#define LOCALS0 (0 * sizeof(sljit_sw)) -#define LOCALS1 (1 * sizeof(sljit_sw)) -/* Two local variables for possessive quantifiers (char1 cannot use them). */ -#define POSSESSIVE0 (2 * sizeof(sljit_sw)) -#define POSSESSIVE1 (3 * sizeof(sljit_sw)) /* Max limit of recursions. */ -#define LIMIT_MATCH (4 * sizeof(sljit_sw)) +#define LIMIT_MATCH (0 * sizeof(sljit_sw)) +/* Local variables. Their number is computed by check_opcode_types. */ +#define LOCAL0 (1 * sizeof(sljit_sw)) +#define LOCAL1 (2 * sizeof(sljit_sw)) +#define LOCAL2 (3 * sizeof(sljit_sw)) +#define LOCAL3 (4 * sizeof(sljit_sw)) +#define LOCAL4 (5 * sizeof(sljit_sw)) /* The output vector is stored on the stack, and contains pointers to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is @@ -667,7 +673,7 @@ the start pointers when the end of the capturing group has not yet reached. */ #define GET_LOCAL_BASE(dst, dstw, offset) \ sljit_get_local_base(compiler, (dst), (dstw), (offset)) -#define READ_CHAR_MAX 0x7fffffff +#define READ_CHAR_MAX ((sljit_u32)0xffffffff) #define INVALID_UTF_CHAR -1 #define UNASSIGNED_UTF_CHAR 888 @@ -862,7 +868,7 @@ the start pointers when the end of the capturing group has not yet reached. */ static PCRE2_SPTR bracketend(PCRE2_SPTR cc) { -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do cc += GET(cc, 1); while (*cc == OP_ALT); SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); cc += 1 + LINK_SIZE; @@ -872,7 +878,7 @@ return cc; static int no_alternatives(PCRE2_SPTR cc) { int count = 0; -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do { cc += GET(cc, 1); @@ -975,6 +981,7 @@ switch(*cc) case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -1097,7 +1104,9 @@ switch(*cc) return cc + GET(cc, 1 + 2*LINK_SIZE); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_ECLASS: case OP_XCLASS: + SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order); return cc + GET(cc, 1); #endif @@ -1114,12 +1123,36 @@ switch(*cc) } } +static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size) +{ +/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */ +int locals_size = 2 * SSIZE_OF(sw); +SLJIT_UNUSED_ARG(common); + +#ifdef SUPPORT_UNICODE +if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp)) + locals_size = 3 * SSIZE_OF(sw); +#endif + +cc += PRIV(OP_lengths)[*cc]; +/* Although do_casefulcmp() uses only one local, the allocate_stack() +calls during the repeat destroys LOCAL1 variables. */ +if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE) + locals_size += 2 * SSIZE_OF(sw); + +return (current_locals_size >= locals_size) ? current_locals_size : locals_size; +} + static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend) { int count; PCRE2_SPTR slot; PCRE2_SPTR assert_back_end = cc - 1; PCRE2_SPTR assert_na_end = cc - 1; +sljit_s32 locals_size = 2 * SSIZE_OF(sw); +BOOL set_recursive_head = FALSE; +BOOL set_capture_last = FALSE; +BOOL set_mark = FALSE; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) @@ -1132,22 +1165,41 @@ while (cc < ccend) cc += 1; break; + case OP_TYPEUPTO: + case OP_TYPEEXACT: + if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += (2 + IMM2_SIZE) - 1; + break; + + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += 2 - 1; + break; + + case OP_TYPEPOSUPTO: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); +#endif + if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += (2 + IMM2_SIZE) - 1; + break; + case OP_REFI: -#ifdef SUPPORT_UNICODE - if (common->iref_ptr == 0) - { - common->iref_ptr = common->ovector_start; - common->ovector_start += 3 * sizeof(sljit_sw); - } -#endif /* SUPPORT_UNICODE */ - /* Fall through. */ case OP_REF: + locals_size = ref_update_local_size(common, cc, locals_size); common->optimized_cbracket[GET2(cc, 1)] = 0; - cc += 1 + IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; break; case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: slot = bracketend(cc); if (slot > assert_na_end) assert_na_end = slot; @@ -1174,8 +1226,10 @@ while (cc < ccend) cc += 1 + IMM2_SIZE; break; - case OP_DNREF: case OP_DNREFI: + case OP_DNREF: + locals_size = ref_update_local_size(common, cc, locals_size); + /* Fall through */ case OP_DNCREF: count = GET2(cc, 1 + IMM2_SIZE); slot = common->name_table + GET2(cc, 1) * common->name_entry_size; @@ -1184,26 +1238,18 @@ while (cc < ccend) common->optimized_cbracket[GET2(slot, 0)] = 0; slot += common->name_entry_size; } - cc += 1 + 2 * IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; break; case OP_RECURSE: /* Set its value only once. */ - if (common->recursive_head_ptr == 0) - { - common->recursive_head_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } + set_recursive_head = TRUE; cc += 1 + LINK_SIZE; break; case OP_CALLOUT: case OP_CALLOUT_STR: - if (common->capture_last_ptr == 0) - { - common->capture_last_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } + set_capture_last = TRUE; cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); break; @@ -1221,15 +1267,8 @@ while (cc < ccend) case OP_COMMIT_ARG: case OP_PRUNE_ARG: - if (cc < assert_na_end) - return FALSE; - /* Fall through */ case OP_MARK: - if (common->mark_ptr == 0) - { - common->mark_ptr = common->ovector_start; - common->ovector_start += sizeof(sljit_sw); - } + set_mark = TRUE; cc += 1 + 2 + cc[1]; break; @@ -1242,8 +1281,6 @@ while (cc < ccend) case OP_SKIP: if (cc < assert_back_end) common->has_skip_in_assert_back = TRUE; - if (cc < assert_na_end) - return FALSE; cc += 1; break; @@ -1252,19 +1289,31 @@ while (cc < ccend) common->has_skip_arg = TRUE; if (cc < assert_back_end) common->has_skip_in_assert_back = TRUE; - if (cc < assert_na_end) - return FALSE; cc += 1 + 2 + cc[1]; break; - case OP_PRUNE: - case OP_COMMIT: case OP_ASSERT_ACCEPT: if (cc < assert_na_end) return FALSE; cc++; break; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + case OP_CRPOSRANGE: + /* The second value can be 0 for infinite repeats. */ + if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); + cc += 1 + 2 * IMM2_SIZE; + break; + + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + if (common->utf && locals_size <= 3 * SSIZE_OF(sw)) + locals_size = 3 * SSIZE_OF(sw); +#endif + /* Fall through */ default: cc = next_opcode(common, cc); if (cc == NULL) @@ -1272,6 +1321,36 @@ while (cc < ccend) break; } } + +SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0); +#if defined SLJIT_DEBUG && SLJIT_DEBUG +common->locals_size = locals_size; +#endif + +if (locals_size > 0) + common->ovector_start += locals_size; + +if (set_mark) + { + SLJIT_ASSERT(common->mark_ptr == 0); + common->mark_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + +if (set_recursive_head) + { + SLJIT_ASSERT(common->recursive_head_ptr == 0); + common->recursive_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + +if (set_capture_last) + { + SLJIT_ASSERT(common->capture_last_ptr == 0); + common->capture_last_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + return TRUE; } @@ -1512,8 +1591,9 @@ do case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: accelerated_start = cc; - cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)))); + cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))); #else accelerated_start = cc; cc += (1 + (32 / sizeof(PCRE2_UCHAR))); @@ -1687,7 +1767,7 @@ switch(*cc) if (max == 0) return (*cc == OP_CRRANGE) ? 2 : 1; max -= min; - if (max > 2) + if (max > (*cc == OP_CRRANGE ? 0 : 1)) max = 2; return max; @@ -1905,6 +1985,12 @@ while (cc < ccend) bracketlen = 1 + LINK_SIZE; break; + case OP_ASSERT_SCS: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += 2 * sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE; + break; + case OP_CBRAPOS: case OP_SCBRAPOS: common->private_data_ptrs[cc - common->start] = private_data_ptr; @@ -1962,13 +2048,13 @@ while (cc < ccend) CASE_ITERATOR_TYPE_PRIVATE_DATA_2A size = 1; - if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) + if (cc[1] != OP_EXTUNI) space = 2; break; case OP_TYPEUPTO: size = 1 + IMM2_SIZE; - if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) + if (cc[1 + IMM2_SIZE] != OP_EXTUNI) space = 2; break; @@ -1985,6 +2071,7 @@ while (cc < ccend) #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: size = GET(cc, 1); space = get_class_iterator_size(cc + size); break; @@ -2200,6 +2287,7 @@ while (cc < ccend) case OP_CLASS: case OP_NCLASS: case OP_XCLASS: + case OP_ECLASS: case OP_CALLOUT: case OP_CALLOUT_STR: @@ -2242,6 +2330,7 @@ if (ccend == NULL) cc = next_opcode(common, cc); } +/* The data is restored by do_revertframes(). */ SLJIT_ASSERT(cc != NULL); while (cc < ccend) switch(*cc) @@ -2516,6 +2605,13 @@ while (cc < ccend) cc += 1 + LINK_SIZE; break; + case OP_ASSERT_SCS: + SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length += 2; + cc += 1 + LINK_SIZE; + break; + case OP_CBRA: case OP_SCBRA: offset = GET2(cc, 1 + LINK_SIZE); @@ -2623,7 +2719,8 @@ while (cc < ccend) case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); + case OP_ECLASS: + size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); #else size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif @@ -2865,6 +2962,14 @@ while (cc < ccend) cc += 1 + LINK_SIZE; break; + case OP_ASSERT_SCS: + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 2; + cc += 1 + LINK_SIZE; + break; + case OP_CBRA: case OP_SCBRA: offset = GET2(cc, 1 + LINK_SIZE); @@ -3005,7 +3110,8 @@ while (cc < ccend) case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); + case OP_ECLASS: + i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); #else i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif @@ -3140,50 +3246,66 @@ static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_S PCRE2_SPTR end = bracketend(cc); BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; -/* Assert captures then. */ -if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) +/* Assert captures *THEN verb even if it has no alternatives. */ +if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) current_offset = NULL; -/* Conditional block does not. */ -if (*cc == OP_COND || *cc == OP_SCOND) +else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS) + has_alternatives = TRUE; +/* Conditional block does never capture. */ +else if (*cc == OP_COND || *cc == OP_SCOND) has_alternatives = FALSE; cc = next_opcode(common, cc); if (has_alternatives) { - if (*cc == OP_REVERSE) - cc += 1 + IMM2_SIZE; - else if (*cc == OP_VREVERSE) - cc += 1 + 2 * IMM2_SIZE; + switch (*cc) + { + case OP_REVERSE: + case OP_CREF: + cc += 1 + IMM2_SIZE; + break; + case OP_VREVERSE: + case OP_DNCREF: + cc += 1 + 2 * IMM2_SIZE; + break; + } current_offset = common->then_offsets + (cc - common->start); } while (cc < end) { - if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) - cc = set_then_offsets(common, cc, current_offset); - else + if (*cc >= OP_ASSERT && *cc <= OP_SCOND) { - if (*cc == OP_ALT && has_alternatives) - { - cc += 1 + LINK_SIZE; + cc = set_then_offsets(common, cc, current_offset); + continue; + } - if (*cc == OP_REVERSE) - cc += 1 + IMM2_SIZE; - else if (*cc == OP_VREVERSE) - cc += 1 + 2 * IMM2_SIZE; + if (*cc == OP_ALT && has_alternatives) + { + cc += 1 + LINK_SIZE; - current_offset = common->then_offsets + (cc - common->start); - continue; - } + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; - if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) - *current_offset = 1; - cc = next_opcode(common, cc); + current_offset = common->then_offsets + (cc - common->start); + continue; } + + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) + *current_offset = 1; + cc = next_opcode(common, cc); } +cc = end - 1 - LINK_SIZE; + +/* Ignore repeats. */ +if (*cc == OP_KET && PRIVATE_DATA(cc) != 0) + end += PRIVATE_DATA(cc + 1); + return end; } @@ -3269,8 +3391,12 @@ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); +#if defined SLJIT_DEBUG && SLJIT_DEBUG +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +/* These two are also used by the stackalloc calls. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0); +#endif #endif add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0)); } @@ -5589,11 +5715,38 @@ if (last) chars->last_count++; } -static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count) +/* Value can be increased if needed. Patterns +such as /(a|){33}b/ can exhaust the stack. + +Note: /(a|){29}b/ already stops scan_prefix() +because it reaches the maximum step_count. */ +#define SCAN_PREFIX_STACK_END 32 + +/* +Scan prefix stores the prefix string in the chars array. +The elements of the chars array is either small character +sets or "any" (count is set to 255). + +Examples (the chars array is represented by a simple regex): + +/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3) +/a[a-z]b+c/ prefix: a.b (length: 3) +/ab?cd/ prefix: a[bc][cd] (length: 3) +/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2) + +The length is returned by scan_prefix(). The length is +less than or equal than the minimum length of the pattern. +*/ + +static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars) { -/* Recursive function, which scans prefix literals. */ +fast_forward_char_data *chars_start = chars; +fast_forward_char_data *chars_end = chars + MAX_N_CHARS; +PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END]; +fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END]; +sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END]; BOOL last, any, class, caseless; -int len, repeat, len_save, consumed = 0; +int stack_ptr, step_count, repeat, len, len_save; sljit_u32 chr; /* Any unicode character. */ sljit_u8 *bytes, *bytes_end, byte; PCRE2_SPTR alternative, cc_save, oc; @@ -5606,11 +5759,44 @@ PCRE2_UCHAR othercase[1]; #endif repeat = 1; +stack_ptr = 0; +step_count = 10000; while (TRUE) { - if (*rec_count == 0) + if (--step_count == 0) return 0; - (*rec_count)--; + + SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS); + + if (chars >= chars_end) + { + if (stack_ptr == 0) + return (int)(chars_end - chars_start); + + --stack_ptr; + cc = cc_stack[stack_ptr]; + chars = chars_stack[stack_ptr]; + + if (chars >= chars_end) + continue; + + if (next_alternative_stack[stack_ptr] != 0) + { + /* When an alternative is processed, the + next alternative is pushed onto the stack. */ + SLJIT_ASSERT(*cc == OP_ALT); + alternative = cc + GET(cc, 1); + if (*alternative == OP_ALT) + { + SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END); + SLJIT_ASSERT(chars_stack[stack_ptr] == chars); + SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1); + cc_stack[stack_ptr] = alternative; + stack_ptr++; + } + cc += 1 + LINK_SIZE; + } + } last = TRUE; any = FALSE; @@ -5650,6 +5836,7 @@ while (TRUE) case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: cc = bracketend(cc); continue; @@ -5686,9 +5873,17 @@ while (TRUE) #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); #endif - max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); - if (max_chars == 0) - return consumed; + if (stack_ptr >= SCAN_PREFIX_STACK_END) + { + chars_end = chars; + continue; + } + + cc_stack[stack_ptr] = cc + len; + chars_stack[stack_ptr] = chars; + next_alternative_stack[stack_ptr] = 0; + stack_ptr++; + last = FALSE; break; @@ -5706,12 +5901,18 @@ while (TRUE) case OP_CBRA: case OP_CBRAPOS: alternative = cc + GET(cc, 1); - while (*alternative == OP_ALT) + if (*alternative == OP_ALT) { - max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); - if (max_chars == 0) - return consumed; - alternative += GET(alternative, 1); + if (stack_ptr >= SCAN_PREFIX_STACK_END) + { + chars_end = chars; + continue; + } + + cc_stack[stack_ptr] = alternative; + chars_stack[stack_ptr] = chars; + next_alternative_stack[stack_ptr] = 1; + stack_ptr++; } if (*cc == OP_CBRA || *cc == OP_CBRAPOS) @@ -5722,22 +5923,34 @@ while (TRUE) case OP_CLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif class = TRUE; break; case OP_NCLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif class = TRUE; break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; cc += GET(cc, 1); @@ -5747,7 +5960,10 @@ while (TRUE) case OP_DIGIT: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5756,7 +5972,10 @@ while (TRUE) case OP_WHITESPACE: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5765,7 +5984,10 @@ while (TRUE) case OP_WORDCHAR: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) - return consumed; + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5781,7 +6003,11 @@ while (TRUE) case OP_ANY: case OP_ALLANY: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; cc++; @@ -5791,7 +6017,11 @@ while (TRUE) case OP_NOTPROP: case OP_PROP: #if PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; cc += 1 + 2; @@ -5806,7 +6036,11 @@ while (TRUE) case OP_NOTEXACT: case OP_NOTEXACTI: #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) return consumed; + if (common->utf) + { + chars_end = chars; + continue; + } #endif any = TRUE; repeat = GET2(cc, 1); @@ -5814,21 +6048,20 @@ while (TRUE) break; default: - return consumed; + chars_end = chars; + continue; } + SLJIT_ASSERT(chars < chars_end); + if (any) { do { chars->count = 255; - - consumed++; - if (--max_chars == 0) - return consumed; chars++; } - while (--repeat > 0); + while (--repeat > 0 && chars < chars_end); repeat = 1; continue; @@ -5839,17 +6072,27 @@ while (TRUE) bytes = (sljit_u8*) (cc + 1); cc += 1 + 32 / sizeof(PCRE2_UCHAR); + SLJIT_ASSERT(last == TRUE && repeat == 1); switch (*cc) { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPOSSTAR: case OP_CRQUERY: case OP_CRMINQUERY: case OP_CRPOSQUERY: - max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); - if (max_chars == 0) - return consumed; + last = FALSE; + /* Fall through */ + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + if (stack_ptr >= SCAN_PREFIX_STACK_END) + { + chars_end = chars; + continue; + } + + cc_stack[stack_ptr] = ++cc; + chars_stack[stack_ptr] = chars; + next_alternative_stack[stack_ptr] = 0; + stack_ptr++; break; default: @@ -5863,7 +6106,13 @@ while (TRUE) case OP_CRPOSRANGE: repeat = GET2(cc, 1); if (repeat <= 0) - return consumed; + { + chars_end = chars; + continue; + } + + last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE)); + cc += 1 + 2 * IMM2_SIZE; break; } @@ -5898,36 +6147,13 @@ while (TRUE) bytes = bytes_end - 32; } - consumed++; - if (--max_chars == 0) - return consumed; chars++; } - while (--repeat > 0); - - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPOSSTAR: - return consumed; - - case OP_CRQUERY: - case OP_CRMINQUERY: - case OP_CRPOSQUERY: - cc++; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - case OP_CRPOSRANGE: - if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) - return consumed; - cc += 1 + 2 * IMM2_SIZE; - break; - } + while (--repeat > 0 && chars < chars_end); repeat = 1; + if (last) + chars_end = chars; continue; } @@ -5943,7 +6169,10 @@ while (TRUE) { GETCHAR(chr, cc); if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) - return consumed; + { + chars_end = chars; + continue; + } } else #endif @@ -5974,7 +6203,6 @@ while (TRUE) do { len--; - consumed++; chr = *cc; add_prefix_char(*cc, chars, len == 0); @@ -5982,15 +6210,13 @@ while (TRUE) if (caseless) add_prefix_char(*oc, chars, len == 0); - if (--max_chars == 0) - return consumed; chars++; cc++; oc++; } - while (len > 0); + while (len > 0 && chars < chars_end); - if (--repeat == 0) + if (--repeat == 0 || chars >= chars_end) break; len = len_save; @@ -5999,7 +6225,7 @@ while (TRUE) repeat = 1; if (last) - return consumed; + chars_end = chars; } } @@ -6169,7 +6395,6 @@ int i, max, from; int range_right = -1, range_len; sljit_u8 *update_table = NULL; BOOL in_range; -sljit_u32 rec_count; for (i = 0; i < MAX_N_CHARS; i++) { @@ -6177,8 +6402,7 @@ for (i = 0; i < MAX_N_CHARS; i++) chars[i].last_count = 0; } -rec_count = 10000; -max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); +max = scan_prefix(common, common->start, chars); if (max < 1) return FALSE; @@ -6768,8 +6992,7 @@ jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); -OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); -OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); +OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0); if (HAS_VIRTUAL_REGISTERS) { OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); @@ -6811,7 +7034,8 @@ struct sljit_jump *jump; SLJIT_UNUSED_ARG(ucp); SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); /* Get type of the previous char, and put it to TMP3. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); @@ -6880,7 +7104,7 @@ JUMPHERE(skipread); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); check_str_end(common, &skipread_list); -peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); +peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2); /* Testing char type. This is a code duplication. */ #ifdef SUPPORT_UNICODE @@ -6919,7 +7143,7 @@ else } set_jumps(skipread_list, LABEL()); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); @@ -6928,15 +7152,15 @@ if (common->invalid_utf) { set_jumps(invalid_utf1, LABEL()); - peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL); + peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL); CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); set_jumps(invalid_utf2, LABEL()); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP1(SLJIT_MOV, TMP2, 0, TMP3, 0); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } @@ -7317,7 +7541,9 @@ else char2_reg = RETURN_ADDR; } -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +/* Update ref_update_local_size() when this changes. */ +SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); if (char1_reg == STR_END) @@ -7336,7 +7562,7 @@ if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); } else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { @@ -7351,7 +7577,7 @@ else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_ JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else @@ -7366,7 +7592,7 @@ else JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); } if (char1_reg == STR_END) @@ -7404,10 +7630,12 @@ if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 2; -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +/* Update ref_update_local_size() when this changes. */ +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0); if (char2_reg == STACK_TOP) { @@ -7461,7 +7689,7 @@ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); if (opt_type == 2) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -7472,1929 +7700,253 @@ if (char2_reg == STACK_TOP) OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0); } -OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } -static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, - compare_context *context, jump_list **backtracks) +#include "pcre2_jit_char_inc.h" + +static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; -unsigned int othercasebit = 0; -PCRE2_SPTR othercasechar = NULL; -#ifdef SUPPORT_UNICODE -int utflength; -#endif +struct sljit_jump *jump[4]; -if (caseless && char_has_othercase(common, cc)) +switch(type) { - othercasebit = char_get_othercase_bit(common, cc); - SLJIT_ASSERT(othercasebit); - /* Extracting bit difference info. */ -#if PCRE2_CODE_UNIT_WIDTH == 8 - othercasechar = cc + (othercasebit >> 8); - othercasebit &= 0xff; -#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - /* Note that this code only handles characters in the BMP. If there - ever are characters outside the BMP whose othercase differs in only one - bit from itself (there currently are none), this code will need to be - revised for PCRE2_CODE_UNIT_WIDTH == 32. */ - othercasechar = cc + (othercasebit >> 9); - if ((othercasebit & 0x100) != 0) - othercasebit = (othercasebit & 0xff) << 8; + case OP_SOD: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } else - othercasebit &= 0xff; -#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ - } + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + return cc; -if (context->sourcereg == -1) - { -#if PCRE2_CODE_UNIT_WIDTH == 8 -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - if (context->length >= 4) - OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else if (context->length >= 2) - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else -#endif - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - if (context->length >= 4) - OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); - else -#endif - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#elif PCRE2_CODE_UNIT_WIDTH == 32 - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ - context->sourcereg = TMP2; - } - -#ifdef SUPPORT_UNICODE -utflength = 1; -if (common->utf && HAS_EXTRALEN(*cc)) - utflength += GET_EXTRALEN(*cc); - -do - { -#endif - - context->length -= IN_UCHARS(1); -#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) - - /* Unaligned read is supported. */ - if (othercasebit != 0 && othercasechar == cc) - { - context->c.asuchars[context->ucharptr] = *cc | othercasebit; - context->oc.asuchars[context->ucharptr] = othercasebit; - } - else - { - context->c.asuchars[context->ucharptr] = *cc; - context->oc.asuchars[context->ucharptr] = 0; - } - context->ucharptr++; - -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) -#else - if (context->ucharptr >= 2 || context->length == 0) -#endif - { - if (context->length >= 4) - OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); - else if (context->length >= 2) - OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#if PCRE2_CODE_UNIT_WIDTH == 8 - else if (context->length >= 1) - OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - - switch(context->ucharptr) - { - case 4 / sizeof(PCRE2_UCHAR): - if (context->oc.asint != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); - break; - - case 2 / sizeof(PCRE2_UCHAR): - if (context->oc.asushort != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); - break; - -#if PCRE2_CODE_UNIT_WIDTH == 8 - case 1: - if (context->oc.asbyte != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); - break; -#endif - - default: - SLJIT_UNREACHABLE(); - break; - } - context->ucharptr = 0; - } - -#else - - /* Unaligned read is unsupported or in 32 bit mode. */ - if (context->length >= 1) - OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); - - context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - - if (othercasebit != 0 && othercasechar == cc) - { - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); - } - else - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); - -#endif - - cc++; -#ifdef SUPPORT_UNICODE - utflength--; - } -while (utflength > 0); -#endif - -return cc; -} - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - -#define SET_CHAR_OFFSET(value) \ - if ((value) != charoffset) \ - { \ - if ((value) < charoffset) \ - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ - else \ - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ - } \ - charoffset = (value); - -static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); - -#ifdef SUPPORT_UNICODE -#define XCLASS_SAVE_CHAR 0x001 -#define XCLASS_CHAR_SAVED 0x002 -#define XCLASS_HAS_TYPE 0x004 -#define XCLASS_HAS_SCRIPT 0x008 -#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 -#define XCLASS_HAS_BOOL 0x020 -#define XCLASS_HAS_BIDICL 0x040 -#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) -#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 -#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 -#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 -#endif /* SUPPORT_UNICODE */ - -static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -jump_list *found = NULL; -jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; -sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; -struct sljit_jump *jump = NULL; -PCRE2_SPTR ccbegin; -int compares, invertcmp, numberofcmps; -#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) -BOOL utf = common->utf; -#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ - -#ifdef SUPPORT_UNICODE -sljit_u32 unicode_status = 0; -sljit_u32 category_list = 0; -sljit_u32 items; -int typereg = TMP1; -const sljit_u32 *other_cases; -#endif /* SUPPORT_UNICODE */ - -/* Scanning the necessary info. */ -cc++; -ccbegin = cc; -compares = 0; - -if (cc[-1] & XCL_MAP) - { - min = 0; - cc += 32 / sizeof(PCRE2_UCHAR); - } - -while (*cc != XCL_END) - { - compares++; - - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - if (c > max) max = c; - if (c < min) min = c; -#ifdef SUPPORT_UNICODE - unicode_status |= XCLASS_SAVE_CHAR; -#endif /* SUPPORT_UNICODE */ - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - if (c < min) min = c; - GETCHARINCTEST(c, cc); - if (c > max) max = c; -#ifdef SUPPORT_UNICODE - unicode_status |= XCLASS_SAVE_CHAR; -#endif /* SUPPORT_UNICODE */ - } -#ifdef SUPPORT_UNICODE - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - - if (*cc == PT_CLIST && cc[-1] == XCL_PROP) - { - other_cases = PRIV(ucd_caseless_sets) + cc[1]; - while (*other_cases != NOTACHAR) - { - if (*other_cases > max) max = *other_cases; - if (*other_cases < min) min = *other_cases; - other_cases++; - } - } - else - { - max = READ_CHAR_MAX; - min = 0; - } - - items = 0; - - switch(*cc) - { - case PT_ANY: - /* Any either accepts everything or ignored. */ - if (cc[-1] == XCL_PROP) - items = UCPCAT_ALL; - else - compares--; - break; - - case PT_LAMP: - items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); - break; - - case PT_GC: - items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); - break; - - case PT_PC: - items = UCPCAT(cc[1]); - break; - - case PT_WORD: - items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; - break; - - case PT_ALNUM: - items = UCPCAT_L | UCPCAT_N; - break; - - case PT_SCX: - unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION; - if (cc[-1] == XCL_NOTPROP) - { - unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; - break; - } - compares++; - /* Fall through */ - - case PT_SC: - unicode_status |= XCLASS_HAS_SCRIPT; - break; - - case PT_SPACE: - case PT_PXSPACE: - case PT_PXGRAPH: - case PT_PXPRINT: - case PT_PXPUNCT: - unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; - break; - - case PT_CLIST: - case PT_UCNC: - case PT_PXXDIGIT: - unicode_status |= XCLASS_SAVE_CHAR; - break; - - case PT_BOOL: - unicode_status |= XCLASS_HAS_BOOL; - break; - - case PT_BIDICL: - unicode_status |= XCLASS_HAS_BIDICL; - break; - - default: - SLJIT_UNREACHABLE(); - break; - } - - if (items > 0) - { - if (cc[-1] == XCL_NOTPROP) - items ^= UCPCAT_ALL; - category_list |= items; - unicode_status |= XCLASS_HAS_TYPE; - compares--; - } - - cc += 2; - } -#endif /* SUPPORT_UNICODE */ - } - -#ifdef SUPPORT_UNICODE -if (category_list == UCPCAT_ALL) - { - /* All characters are accepted, same as dotall. */ - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list == backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } - -if (compares == 0 && category_list == 0) - { - /* No characters are accepted, same as (*F) or dotall. */ - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list != backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } -#else /* !SUPPORT_UNICODE */ -SLJIT_ASSERT(compares > 0); -#endif /* SUPPORT_UNICODE */ - -/* We are not necessary in utf mode even in 8 bit mode. */ -cc = ccbegin; -if ((cc[-1] & XCL_NOT) != 0) - read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); -else - { -#ifdef SUPPORT_UNICODE - read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); -#else /* !SUPPORT_UNICODE */ - read_char(common, min, max, NULL, 0); -#endif /* SUPPORT_UNICODE */ - } - -if ((cc[-1] & XCL_HASPROP) == 0) - { - if ((cc[-1] & XCL_MAP) != 0) - { - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) - { - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); - add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); - } - - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump); - - cc += 32 / sizeof(PCRE2_UCHAR); - } - else - { - OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); - add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); - } - } -else if ((cc[-1] & XCL_MAP) != 0) - { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); -#ifdef SUPPORT_UNICODE - unicode_status |= XCLASS_CHAR_SAVED; -#endif /* SUPPORT_UNICODE */ - if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) - { -#if PCRE2_CODE_UNIT_WIDTH == 8 - jump = NULL; - if (common->utf) -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); - add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); - -#if PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf) -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - JUMPHERE(jump); - } - - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - cc += 32 / sizeof(PCRE2_UCHAR); - } - -#ifdef SUPPORT_UNICODE -if (unicode_status & XCLASS_NEEDS_UCD) - { - if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR) - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); - -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (!common->utf) - { - jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); - JUMPHERE(jump); - } -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - - OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); - OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - - ccbegin = cc; - - if (category_list != 0) - compares++; - - if (unicode_status & XCLASS_HAS_BIDICL) - { - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_BIDICL) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_BOOL) - { - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_BOOL) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); - add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_SCRIPT) - { - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - switch (*cc) - { - case PT_SCX: - if (cc[-1] == XCL_NOTPROP) - break; - /* Fall through */ - - case PT_SC: - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - - add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) - { - OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); - - if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) - { - if (unicode_status & XCLASS_HAS_TYPE) - { - if (unicode_status & XCLASS_SAVE_CHAR) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0); - unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0; - } - else - { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); - unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; - } - } - OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - } - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_SCX) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - - jump = NULL; - if (cc[-1] == XCL_NOTPROP) - { - jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); - if (invertcmp) - { - add_jump(compiler, backtracks, jump); - jump = NULL; - } - invertcmp ^= 0x1; - } - - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); - add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); - - if (jump != NULL) - JUMPHERE(jump); - } - cc += 2; - } - } - - if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); - else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) - OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); - cc = ccbegin; - } - - if (unicode_status & XCLASS_SAVE_CHAR) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - - if (unicode_status & XCLASS_HAS_TYPE) - { - if (unicode_status & XCLASS_SAVE_CHAR) - typereg = RETURN_ADDR; - - OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0); - - if (category_list > 0) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list); - add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); - } - } - } -#endif /* SUPPORT_UNICODE */ - -/* Generating code. */ -charoffset = 0; -numberofcmps = 0; - -while (*cc != XCL_END) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - jump = NULL; - - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - - if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - numberofcmps++; - } - else if (numberofcmps > 0) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - numberofcmps = 0; - } - else - { - jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - numberofcmps = 0; - } - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - SET_CHAR_OFFSET(c); - GETCHARINCTEST(c, cc); - - if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) - { - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - numberofcmps++; - } - else if (numberofcmps > 0) - { - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - numberofcmps = 0; - } - else - { - jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); - numberofcmps = 0; - } - } -#ifdef SUPPORT_UNICODE - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - if (*cc == XCL_NOTPROP) - invertcmp ^= 0x1; - cc++; - switch(*cc) - { - case PT_ANY: - case PT_LAMP: - case PT_GC: - case PT_PC: - case PT_SC: - case PT_SCX: - case PT_BOOL: - case PT_BIDICL: - case PT_WORD: - case PT_ALNUM: - compares++; - /* Already handled. */ - break; - - case PT_SPACE: - case PT_PXSPACE: - SET_CHAR_OFFSET(9); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_CLIST: - other_cases = PRIV(ucd_caseless_sets) + cc[1]; - - /* At least three characters are required. - Otherwise this case would be handled by the normal code path. */ - SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); - SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); - - /* Optimizing character pairs, if their difference is power of 2. */ - if (is_powerof2(other_cases[1] ^ other_cases[0])) - { - if (charoffset == 0) - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - } - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - other_cases += 2; - } - else if (is_powerof2(other_cases[2] ^ other_cases[1])) - { - if (charoffset == 0) - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); - } - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); - OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); - - other_cases += 3; - } - else - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - } - - while (*other_cases != NOTACHAR) - { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); - OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); - } - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_UCNC: - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - SET_CHAR_OFFSET(0xa0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - SET_CHAR_OFFSET(0); - OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_PXGRAPH: - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); - jump = JUMP(SLJIT_ZERO); - - c = charoffset; - /* In case of ucp_Cf, we overwrite the result. */ - SET_CHAR_OFFSET(0x2066); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - /* Restore charoffset. */ - SET_CHAR_OFFSET(c); - - JUMPHERE(jump); - jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); - break; - - case PT_PXPRINT: - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); - jump = JUMP(SLJIT_ZERO); - - c = charoffset; - /* In case of ucp_Cf, we overwrite the result. */ - SET_CHAR_OFFSET(0x2066); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - - /* Restore charoffset. */ - SET_CHAR_OFFSET(c); - - JUMPHERE(jump); - jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); - break; - - case PT_PXPUNCT: - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - - SET_CHAR_OFFSET(0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); - OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); - - OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_PXXDIGIT: - SET_CHAR_OFFSET(CHAR_A); - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(CHAR_0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff10); - jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10); - - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff21); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff41); - OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - - SET_CHAR_OFFSET(0xff10); - - JUMPHERE(jump); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - default: - SLJIT_UNREACHABLE(); - break; - } - cc += 2; - } -#endif /* SUPPORT_UNICODE */ - - if (jump != NULL) - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - -SLJIT_ASSERT(compares == 0); -if (found != NULL) - set_jumps(found, LABEL()); -} - -#undef SET_TYPE_OFFSET -#undef SET_CHAR_OFFSET - -#endif - -static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -struct sljit_jump *jump[4]; - -switch(type) - { - case OP_SOD: - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - } - else - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); - return cc; - - case OP_SOM: - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - } - else - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); - return cc; - - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_NOT_UCP_WORD_BOUNDARY: - case OP_UCP_WORD_BOUNDARY: - add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL)); -#ifdef SUPPORT_UNICODE - if (common->invalid_utf) - { - add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - return cc; - } -#endif /* SUPPORT_UNICODE */ - sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO)); - return cc; - - case OP_EODN: - /* Requires rather complex checks. */ - jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); - OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else if (common->nltype == NLTYPE_FIXED) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); - } - else - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0); - jump[2] = JUMP(SLJIT_GREATER); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); - /* Equal. */ - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - - JUMPHERE(jump[1]); - if (common->nltype == NLTYPE_ANYCRLF) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); - } - else - { - OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); - read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); - add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); - OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - } - JUMPHERE(jump[2]); - JUMPHERE(jump[3]); - } - JUMPHERE(jump[0]); - if (common->mode != PCRE2_JIT_COMPLETE) - check_partial(common, TRUE); - return cc; - - case OP_EOD: - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); - if (common->mode != PCRE2_JIT_COMPLETE) - check_partial(common, TRUE); - return cc; - - case OP_DOLL: - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - } - else - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - - if (!common->endonly) - compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); - else - { - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - } - return cc; - - case OP_DOLLM: - jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - } - else - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - check_partial(common, FALSE); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); - /* STR_PTR = STR_END - IN_UCHARS(1) */ - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - peek_char(common, common->nlmax, TMP3, 0, NULL); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - - case OP_CIRC: - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - } - else - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - } - return cc; - - case OP_CIRCM: - /* TMP2 might be used by peek_char_back. */ - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - } - else - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - } - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - if (!common->alt_circumflex) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - peek_char_back(common, common->nlmax, backtracks); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - } -SLJIT_UNREACHABLE(); -return cc; -} - -#ifdef SUPPORT_UNICODE - -#if PCRE2_CODE_UNIT_WIDTH != 32 - -/* The code in this function copies the logic of the interpreter function that -is defined in the pcre2_extuni.c source. If that code is updated, this -function, and those below it, must be kept in step (note by PH, June 2024). */ - -static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) -{ -PCRE2_SPTR start_subject = args->begin; -PCRE2_SPTR end_subject = args->end; -int lgb, rgb, ricount; -PCRE2_SPTR prevcc, endcc, bptr; -BOOL first = TRUE; -BOOL was_ep_ZWJ = FALSE; -uint32_t c; - -prevcc = cc; -endcc = NULL; -do - { - GETCHARINC(c, cc); - rgb = UCD_GRAPHBREAK(c); - - if (first) - { - lgb = rgb; - endcc = cc; - first = FALSE; - continue; - } - - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) - break; - - /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was - preceded by Extended Pictographic. */ - - if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) - break; - - /* Not breaking between Regional Indicators is allowed only if there - are an even number of preceding RIs. */ - - if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) - { - ricount = 0; - bptr = prevcc; - - /* bptr is pointing to the left-hand character */ - while (bptr > start_subject) - { - bptr--; - BACKCHAR(bptr); - GETCHAR(c, bptr); - - if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) - break; - - ricount++; - } - - if ((ricount & 1) != 0) break; /* Grapheme break required */ - } - - /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in - between; see next statement). */ - - was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); - - /* If Extend follows Extended_Pictographic, do not update lgb; this allows - any number of them before a following ZWJ. */ - - if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) - lgb = rgb; - - prevcc = endcc; - endcc = cc; - } -while (cc < end_subject); - -return endcc; -} - -#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ - -/* The code in this function copies the logic of the interpreter function that -is defined in the pcre2_extuni.c source. If that code is updated, this -function, and the one below it, must be kept in step (note by PH, June 2024). */ - -static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) -{ -PCRE2_SPTR start_subject = args->begin; -PCRE2_SPTR end_subject = args->end; -int lgb, rgb, ricount; -PCRE2_SPTR prevcc, endcc, bptr; -BOOL first = TRUE; -BOOL was_ep_ZWJ = FALSE; -uint32_t c; - -prevcc = cc; -endcc = NULL; -do - { - GETCHARINC_INVALID(c, cc, end_subject, break); - rgb = UCD_GRAPHBREAK(c); - - if (first) - { - lgb = rgb; - endcc = cc; - first = FALSE; - continue; - } - - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) - break; - - /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was - preceded by Extended Pictographic. */ - - if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) - break; - - /* Not breaking between Regional Indicators is allowed only if there - are an even number of preceding RIs. */ - - if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) - { - ricount = 0; - bptr = prevcc; - - /* bptr is pointing to the left-hand character */ - while (bptr > start_subject) - { - GETCHARBACK_INVALID(c, bptr, start_subject, break); - - if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) - break; - - ricount++; - } - - if ((ricount & 1) != 0) - break; /* Grapheme break required */ - } - - /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in - between; see next statement). */ - - was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); - - /* If Extend follows Extended_Pictographic, do not update lgb; this allows - any number of them before a following ZWJ. */ - - if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) - lgb = rgb; - - prevcc = endcc; - endcc = cc; - } -while (cc < end_subject); - -return endcc; -} - -/* The code in this function copies the logic of the interpreter function that -is defined in the pcre2_extuni.c source. If that code is updated, this -function must be kept in step (note by PH, June 2024). */ - -static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) -{ -PCRE2_SPTR start_subject = args->begin; -PCRE2_SPTR end_subject = args->end; -int lgb, rgb, ricount; -PCRE2_SPTR bptr; -uint32_t c; -BOOL was_ep_ZWJ = FALSE; - -/* Patch by PH */ -/* GETCHARINC(c, cc); */ -c = *cc++; - -#if PCRE2_CODE_UNIT_WIDTH == 32 -if (c >= 0x110000) - return cc; -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ -lgb = UCD_GRAPHBREAK(c); - -while (cc < end_subject) - { - c = *cc; -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x110000) - break; -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - rgb = UCD_GRAPHBREAK(c); - - if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) - break; - - /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was - preceded by Extended Pictographic. */ - - if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) - break; - - /* Not breaking between Regional Indicators is allowed only if there - are an even number of preceding RIs. */ - - if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) - { - ricount = 0; - bptr = cc - 1; - - /* bptr is pointing to the left-hand character */ - while (bptr > start_subject) - { - bptr--; - c = *bptr; -#if PCRE2_CODE_UNIT_WIDTH == 32 - if (c >= 0x110000) - break; -#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - - if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; - - ricount++; - } - - if ((ricount & 1) != 0) - break; /* Grapheme break required */ - } - - /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in - between; see next statement). */ - - was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); - - /* If Extend follows Extended_Pictographic, do not update lgb; this allows - any number of them before a following ZWJ. */ - - if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) - lgb = rgb; - - cc++; - } - -return cc; -} - -#endif /* SUPPORT_UNICODE */ - -static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr) -{ -DEFINE_COMPILER; -int length; -unsigned int c, oc, bit; -compare_context context; -struct sljit_jump *jump[3]; -jump_list *end_list; -#ifdef SUPPORT_UNICODE -PCRE2_UCHAR propdata[5]; -#endif /* SUPPORT_UNICODE */ - -switch(type) - { - case OP_NOT_DIGIT: - case OP_DIGIT: - /* Digits are usually 0-9, so it is worth to optimize them. */ - if (check_str_ptr) - detect_partial_match(common, backtracks); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE)) - read_char7_type(common, backtracks, type == OP_NOT_DIGIT); - else -#endif - read_char8_type(common, backtracks, type == OP_NOT_DIGIT); - /* Flip the starting bit in the negative case. */ - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); - add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); - return cc; - - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - if (check_str_ptr) - detect_partial_match(common, backtracks); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE)) - read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE); - else -#endif - read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); - add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); - return cc; - - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - if (check_str_ptr) - detect_partial_match(common, backtracks); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE)) - read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR); - else -#endif - read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); - add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); - return cc; - - case OP_ANY: - if (check_str_ptr) - detect_partial_match(common, backtracks); - read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) + case OP_SOM: + if (HAS_VIRTUAL_REGISTERS) { - jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - end_list = NULL; - if (common->mode != PCRE2_JIT_PARTIAL_HARD) - add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - else - check_str_end(common, &end_list); - - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); - set_jumps(end_list, LABEL()); - JUMPHERE(jump[0]); + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); } else - check_newlinechar(common, common->nltype, backtracks, TRUE); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; - case OP_ALLANY: - if (check_str_ptr) - detect_partial_match(common, backtracks); + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL)); #ifdef SUPPORT_UNICODE - if (common->utf && common->invalid_utf) + if (common->invalid_utf) { - read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); return cc; } #endif /* SUPPORT_UNICODE */ - - skip_valid_char(common); - return cc; - - case OP_ANYBYTE: - if (check_str_ptr) - detect_partial_match(common, backtracks); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - return cc; - -#ifdef SUPPORT_UNICODE - case OP_NOTPROP: - case OP_PROP: - propdata[0] = XCL_HASPROP; - propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; - propdata[2] = cc[0]; - propdata[3] = cc[1]; - propdata[4] = XCL_END; - if (check_str_ptr) - detect_partial_match(common, backtracks); - compile_xclass_matchingpath(common, propdata, backtracks); - return cc + 2; -#endif - - case OP_ANYNL: - if (check_str_ptr) - detect_partial_match(common, backtracks); - read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0); - jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - /* We don't need to handle soft partial matching case. */ - end_list = NULL; - if (common->mode != PCRE2_JIT_PARTIAL_HARD) - add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - else - check_str_end(common, &end_list); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[2] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[0]); - check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); - set_jumps(end_list, LABEL()); - JUMPHERE(jump[1]); - JUMPHERE(jump[2]); - return cc; - - case OP_NOT_HSPACE: - case OP_HSPACE: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - if (type == OP_NOT_HSPACE) - read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0x9, 0x3000, NULL, 0); - - add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO)); return cc; - case OP_NOT_VSPACE: - case OP_VSPACE: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - if (type == OP_NOT_VSPACE) - read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR); + case OP_EODN: + /* Requires rather complex checks. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else if (common->nltype == NLTYPE_FIXED) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } else - read_char(common, 0xa, 0x2029, NULL, 0); + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0); + jump[2] = JUMP(SLJIT_GREATER); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); + /* Equal. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); - sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + JUMPHERE(jump[1]); + if (common->nltype == NLTYPE_ANYCRLF) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + } + JUMPHERE(jump[2]); + JUMPHERE(jump[3]); + } + JUMPHERE(jump[0]); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); return cc; -#ifdef SUPPORT_UNICODE - case OP_EXTUNI: - if (check_str_ptr) - detect_partial_match(common, backtracks); - - SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); - OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); - -#if PCRE2_CODE_UNIT_WIDTH != 32 - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, - common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); - if (common->invalid_utf) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); -#else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, - common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); - if (common->invalid_utf) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); -#endif - - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); - - if (common->mode == PCRE2_JIT_PARTIAL_HARD) - { - jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0); - /* Since we successfully read a char above, partial matching must occure. */ + case OP_EOD: + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + if (common->mode != PCRE2_JIT_COMPLETE) check_partial(common, TRUE); - JUMPHERE(jump[0]); - } return cc; -#endif - - case OP_CHAR: - case OP_CHARI: - length = 1; -#ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); -#endif - - if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE) - detect_partial_match(common, backtracks); - if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) + case OP_DOLL: + if (HAS_VIRTUAL_REGISTERS) { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); - - context.length = IN_UCHARS(length); - context.sourcereg = -1; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.ucharptr = 0; -#endif - return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); } + else + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); -#ifdef SUPPORT_UNICODE - if (common->utf) + if (!common->endonly) + compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); + else { - GETCHAR(c, cc); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); } - else -#endif - c = *cc; - - SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc)); - - if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - - oc = char_othercase(common, c); - read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0); - - SLJIT_ASSERT(!is_powerof2(c ^ oc)); + return cc; - if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + case OP_DOLLM: + jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) { - OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); - SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); } else - { - jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); - JUMPHERE(jump[0]); - } - return cc + length; - - case OP_NOT: - case OP_NOTI: - if (check_str_ptr) - detect_partial_match(common, backtracks); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + check_partial(common, FALSE); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); - length = 1; -#ifdef SUPPORT_UNICODE - if (common->utf) + if (common->nltype == NLTYPE_FIXED && common->newline > 255) { -#if PCRE2_CODE_UNIT_WIDTH == 8 - c = *cc; - if (c < 128 && !common->invalid_utf) - { - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - else - { - /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); - } - /* Skip the variable-length character. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(jump[0]); - return cc + 1; - } + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); else -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ { - GETCHARLEN(c, cc, length); + jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); } + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else -#endif /* SUPPORT_UNICODE */ - c = *cc; + { + peek_char(common, common->nlmax, TMP3, 0, NULL); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; - if (type == OP_NOT || !char_has_othercase(common, cc)) + case OP_CIRC: + if (HAS_VIRTUAL_REGISTERS) { - read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } else { - oc = char_othercase(common, c); - read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR); - bit = c ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); - } - else - { - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); - } + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } - return cc + length; - - case OP_CLASS: - case OP_NCLASS: - if (check_str_ptr) - detect_partial_match(common, backtracks); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; - if (type == OP_NCLASS) - read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0, bit, NULL, 0); -#else - if (type == OP_NCLASS) - read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR); - else - read_char(common, 0, 255, NULL, 0); -#endif - - if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) - return cc + 32 / sizeof(PCRE2_UCHAR); + return cc; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 - jump[0] = NULL; - if (common->utf) + case OP_CIRCM: + /* TMP2 might be used by peek_char_back. */ + if (HAS_VIRTUAL_REGISTERS) { - jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); - if (type == OP_CLASS) - { - add_jump(compiler, backtracks, jump[0]); - jump[0] = NULL; - } + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } -#elif PCRE2_CODE_UNIT_WIDTH != 8 - jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (type == OP_CLASS) + else { - add_jump(compiler, backtracks, jump[0]); - jump[0] = NULL; + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } -#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ - - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); - add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - if (jump[0] != NULL) - JUMPHERE(jump[0]); -#endif - return cc + 32 / sizeof(PCRE2_UCHAR); - -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - case OP_XCLASS: - if (check_str_ptr) - detect_partial_match(common, backtracks); - compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); - return cc + GET(cc, 0) - 1; -#endif - } -SLJIT_UNREACHABLE(); -return cc; -} - -static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks) -{ -/* This function consumes at least one input character. */ -/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ -DEFINE_COMPILER; -PCRE2_SPTR ccbegin = cc; -compare_context context; -int size; + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); -context.length = 0; -do - { - if (cc >= ccend) - break; + if (!common->alt_circumflex) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - if (*cc == OP_CHAR) + if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - size = 1; -#ifdef SUPPORT_UNICODE - if (common->utf && HAS_EXTRALEN(cc[1])) - size += GET_EXTRALEN(cc[1]); -#endif + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } - else if (*cc == OP_CHARI) + else { - size = 1; -#ifdef SUPPORT_UNICODE - if (common->utf) - { - if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) - size = 0; - else if (HAS_EXTRALEN(cc[1])) - size += GET_EXTRALEN(cc[1]); - } - else -#endif - if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) - size = 0; + peek_char_back(common, common->nlmax, backtracks); + check_newlinechar(common, common->nltype, backtracks, FALSE); } - else - size = 0; - - cc += 1 + size; - context.length += IN_UCHARS(size); - } -while (size > 0 && context.length <= 128); - -cc = ccbegin; -if (context.length > 0) - { - /* We have a fixed-length byte sequence. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); - - context.sourcereg = -1; -#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.ucharptr = 0; -#endif - do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); + JUMPHERE(jump[0]); return cc; } - -/* A non-fixed length character will be checked if length == 0. */ -return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); +SLJIT_UNREACHABLE(); +return cc; } /* Forward definitions. */ @@ -9470,10 +8022,16 @@ struct sljit_jump *nopartial; #if defined SUPPORT_UNICODE struct sljit_label *loop; struct sljit_label *caseless_loop; +struct sljit_jump *turkish_ascii_i = NULL; +struct sljit_jump *turkish_non_ascii_i = NULL; jump_list *no_match = NULL; int source_reg = COUNT_MATCH; int source_end_reg = ARGUMENTS; int char1_reg = STACK_LIMIT; +PCRE2_UCHAR refi_flag = 0; + +if (*cc == OP_REFI || *cc == OP_DNREFI) + refi_flag = cc[PRIV(OP_lengths)[*cc] - 1]; #endif /* SUPPORT_UNICODE */ if (ref) @@ -9488,9 +8046,10 @@ else OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); #if defined SUPPORT_UNICODE -if (common->utf && *cc == OP_REFI) +if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI)) { - SLJIT_ASSERT(common->iref_ptr != 0); + /* Update ref_update_local_size() when this changes. */ + SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw)); if (ref) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); @@ -9500,9 +8059,9 @@ if (common->utf && *cc == OP_REFI) if (withchecks && emptyfail) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0); OP1(SLJIT_MOV, source_reg, 0, TMP1, 0); OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0); @@ -9526,6 +8085,16 @@ if (common->utf && *cc == OP_REFI) CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) == + REFI_FLAG_TURKISH_CASING) + { + OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20); + turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69); + + OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1); + turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131); + } + OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); @@ -9545,6 +8114,9 @@ if (common->utf && *cc == OP_REFI) OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets)); + if (refi_flag & REFI_FLAG_CASELESS_RESTRICT) + add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128)); + caseless_loop = LABEL(); OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); @@ -9552,30 +8124,52 @@ if (common->utf && *cc == OP_REFI) JUMPTO(SLJIT_EQUAL, loop); JUMPTO(SLJIT_LESS, caseless_loop); + if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) == + REFI_FLAG_TURKISH_CASING) + { + add_jump(compiler, &no_match, JUMP(SLJIT_JUMP)); + JUMPHERE(turkish_ascii_i); + + OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5); + OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130); + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + + add_jump(compiler, &no_match, JUMP(SLJIT_JUMP)); + JUMPHERE(turkish_non_ascii_i); + + OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1); + OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5); + OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49); + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + } + set_jumps(no_match, LABEL()); if (common->mode == PCRE2_JIT_COMPLETE) JUMPHERE(partial); - OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); - OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); if (common->mode != PCRE2_JIT_COMPLETE) { JUMPHERE(partial); - OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); - OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); check_partial(common, FALSE); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); } JUMPHERE(jump); - OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); - OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); return; } else @@ -9594,7 +8188,7 @@ else if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, backtracks, partial); - add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); if (common->mode != PCRE2_JIT_COMPLETE) @@ -9606,7 +8200,7 @@ else OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); - add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); JUMPHERE(partial); check_partial(common, FALSE); @@ -9630,6 +8224,7 @@ DEFINE_COMPILER; BOOL ref = (*cc == OP_REF || *cc == OP_REFI); backtrack_common *backtrack; PCRE2_UCHAR type; +int local_start = LOCAL2; int offset = 0; struct sljit_label *label; struct sljit_jump *zerolength; @@ -9644,9 +8239,21 @@ if (ref) offset = GET2(cc, 1) << 1; else cc += IMM2_SIZE; + +if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI) + { + cc += 1; +#ifdef SUPPORT_UNICODE + if (common->utf || common->ucp) + local_start = LOCAL3; +#endif + } + type = cc[1 + IMM2_SIZE]; SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); +/* Update ref_update_local_size() when this changes. */ +SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size); minimize = (type & 0x1) != 0; switch(type) { @@ -9698,7 +8305,7 @@ if (!minimize) { compile_dnref_search(common, ccbegin, NULL); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); } /* Restore if not zero length. */ @@ -9721,24 +8328,24 @@ if (!minimize) { compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); } } if (min > 1 || max > 1) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0); label = LABEL(); if (!ref) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw)); compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE); if (min > 1 || max > 1) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0); if (min > 1) CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); if (max > 1) @@ -10006,12 +8613,13 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_pt SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); /* Needed to save important temporary registers. */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit)); -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); free_stack(common, callout_arg_size); /* Check return value. */ @@ -10179,6 +8787,7 @@ jump_list **found; /* Saving previous accept variables. */ BOOL save_local_quit_available = common->local_quit_available; BOOL save_in_positive_assertion = common->in_positive_assertion; +sljit_s32 save_restore_end_ptr = common->restore_end_ptr; then_trap_backtrack *save_then_trap = common->then_trap; struct sljit_label *save_quit_label = common->quit_label; struct sljit_label *save_accept_label = common->accept_label; @@ -10286,6 +8895,7 @@ if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)) { /* Control verbs cannot escape from these asserts. */ local_quit_available = TRUE; + common->restore_end_ptr = 0; common->local_quit_available = TRUE; common->quit_label = NULL; common->quit = NULL; @@ -10321,6 +8931,7 @@ while (1) common->quit = save_quit; } common->in_positive_assertion = save_in_positive_assertion; + common->restore_end_ptr = save_restore_end_ptr; common->then_trap = save_then_trap; common->accept_label = save_accept_label; common->positive_assertion_quit = save_positive_assertion_quit; @@ -10418,6 +9029,7 @@ while (1) common->quit = save_quit; } common->in_positive_assertion = save_in_positive_assertion; + common->restore_end_ptr = save_restore_end_ptr; common->then_trap = save_then_trap; common->accept_label = save_accept_label; common->positive_assertion_quit = save_positive_assertion_quit; @@ -10557,7 +9169,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { JUMPTO(SLJIT_JUMP, backtrack->matchingpath); JUMPHERE(brajump); - if (framesize >= 0) + SLJIT_ASSERT(framesize != 0); + if (framesize > 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -10622,7 +9235,9 @@ if (local_quit_available) common->quit_label = save_quit_label; common->quit = save_quit; } + common->in_positive_assertion = save_in_positive_assertion; +common->restore_end_ptr = save_restore_end_ptr; common->then_trap = save_then_trap; common->accept_label = save_accept_label; common->positive_assertion_quit = save_positive_assertion_quit; @@ -10813,6 +9428,7 @@ BOOL needs_control_head = FALSE; BOOL has_vreverse = FALSE; struct sljit_jump *jump; struct sljit_jump *skip; +jump_list *jumplist; struct sljit_label *rmax_label = NULL; struct sljit_jump *braminzero = NULL; @@ -10875,7 +9491,8 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; matchingpath += IMM2_SIZE; } -else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE + || opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { /* Other brackets simply allocate the next entry. */ private_data_ptr = PRIVATE_DATA(ccbegin); @@ -11086,6 +9703,88 @@ else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SC if (*matchingpath == OP_REVERSE) matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); } +else if (opcode == OP_ASSERT_SCS) + { + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == 0) + common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw); + + if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF)) + { + /* Optimized case for a single capture reference. */ + i = OVECTOR(GET2(matchingpath, 1) << 1); + + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i); + + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + matchingpath += 1 + IMM2_SIZE; + + allocate_stack(common, has_alternatives ? 3 : 2); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + jumplist = NULL; + + while (TRUE) + { + if (*matchingpath == OP_CREF) + { + sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1)); + matchingpath += 1 + IMM2_SIZE; + } + else + { + SLJIT_ASSERT(*matchingpath == OP_DNCREF); + + i = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + + while (i-- > 1) + { + sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1)); + add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0)); + slot += common->name_entry_size; + } + + sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1)); + matchingpath += 1 + 2 * IMM2_SIZE; + } + + if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF) + break; + + add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0)); + } + + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), + CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0)); + + set_jumps(jumplist, LABEL()); + + allocate_stack(common, has_alternatives ? 3 : 2); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0); + + if (has_alternatives) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } else if (has_alternatives) { /* Pushing the starting string pointer. */ @@ -11099,7 +9798,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) if (*matchingpath == OP_CREF) { SLJIT_ASSERT(has_alternatives); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); matchingpath += 1 + IMM2_SIZE; } @@ -11121,13 +9820,13 @@ if (opcode == OP_COND || opcode == OP_SCOND) slot += common->name_entry_size; } OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO)); matchingpath += 1 + 2 * IMM2_SIZE; } else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) { /* Never has other case. */ - BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; + BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL; SLJIT_ASSERT(!has_alternatives); if (*matchingpath == OP_TRUE) @@ -11216,9 +9915,6 @@ switch (opcode) if (PRIVATE_DATA(ccbegin + 1)) OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); break; - case OP_ASSERT_NA: - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - break; case OP_ONCE: match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); break; @@ -11284,7 +9980,7 @@ if (has_alternatives) if (i <= 3) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); else - BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); } if (ket != OP_KETRMAX) BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); @@ -11296,6 +9992,22 @@ if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); } +else switch (opcode) + { + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_ASSERT_SCS: + OP1(SLJIT_MOV, TMP1, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0); + + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw)) + common->restore_end_ptr = 0; + break; + } if (ket == OP_KETRMAX) { @@ -11359,13 +10071,19 @@ if (bra == OP_BRAMINZERO) /* We need to release the end pointer to perform the backtrack for the zero-length iteration. When framesize is < 0, OP_ONCE will do the release itself. */ - if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) + if (opcode == OP_ONCE) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(framesize != 0); + if (framesize > 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); + } } - else if (ket == OP_KETRMIN && opcode != OP_ONCE) + else if (ket == OP_KETRMIN) free_stack(common, 1); } /* Continue to the normal backtrack. */ @@ -11613,12 +10331,7 @@ while (*cc != OP_KETRPOS) add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); if (!zero) - { - if (framesize < 0) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); - else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } JUMPTO(SLJIT_JUMP, loop); @@ -11712,11 +10425,11 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) } else { - SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); + SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS); *type = *opcode; + class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1); + *opcode = cc[class_len]; cc++; - class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0); - *opcode = cc[class_len - 1]; if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) { @@ -11728,8 +10441,10 @@ else *exact = 1; *opcode -= OP_PLUS - OP_STAR; } + return cc; } - else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) + + if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) { *opcode -= OP_CRPOSSTAR - OP_POSSTAR; *end = cc + class_len; @@ -11739,41 +10454,40 @@ else *exact = 1; *opcode = OP_POSSTAR; } + return cc; } - else + + SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); + *max = GET2(cc, (class_len + IMM2_SIZE)); + *exact = GET2(cc, class_len); + *end = cc + class_len + 2 * IMM2_SIZE; + + if (*max == 0) { - SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); - *max = GET2(cc, (class_len + IMM2_SIZE)); - *exact = GET2(cc, class_len); + SLJIT_ASSERT(*exact > 1); + if (*opcode == OP_CRRANGE) + *opcode = OP_UPTO; + else if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else + *opcode = OP_MINSTAR; + return cc; + } - if (*max == 0) - { - if (*opcode == OP_CRPOSRANGE) - *opcode = OP_POSSTAR; - else - *opcode -= OP_CRRANGE - OP_STAR; - } + *max -= *exact; + if (*max == 0) + *opcode = OP_EXACT; + else + { + SLJIT_ASSERT(*exact > 0 || *max > 1); + if (*opcode == OP_CRRANGE) + *opcode = OP_UPTO; + else if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else if (*max == 1) + *opcode = OP_MINQUERY; else - { - *max -= *exact; - if (*max == 0) - *opcode = OP_EXACT; - else if (*max == 1) - { - if (*opcode == OP_CRPOSRANGE) - *opcode = OP_POSQUERY; - else - *opcode -= OP_CRRANGE - OP_QUERY; - } - else - { - if (*opcode == OP_CRPOSRANGE) - *opcode = OP_POSUPTO; - else - *opcode -= OP_CRRANGE - OP_UPTO; - } - } - *end = cc + class_len + 2 * IMM2_SIZE; + *opcode = OP_MINUPTO; } return cc; } @@ -11819,16 +10533,17 @@ if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); return cc; } -static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks) { DEFINE_COMPILER; -backtrack_common *backtrack; +backtrack_common *backtrack = NULL; +PCRE2_SPTR begin = cc; PCRE2_UCHAR opcode; PCRE2_UCHAR type; sljit_u32 max = 0, exact; sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1); sljit_s32 early_fail_type; -BOOL charpos_enabled; +BOOL charpos_enabled, use_tmp; PCRE2_UCHAR charpos_char; unsigned int charpos_othercasebit; PCRE2_SPTR end; @@ -11841,11 +10556,6 @@ int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); int tmp_base, tmp_offset; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -BOOL use_tmp; -#endif - -PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); early_fail_type = (early_fail_ptr & 0x7); early_fail_ptr >>= 3; @@ -11861,7 +10571,7 @@ SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr)); if (early_fail_type == type_fail) - add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); + add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -11873,39 +10583,47 @@ if (type != OP_EXTUNI) else { tmp_base = SLJIT_MEM1(SLJIT_SP); - tmp_offset = POSSESSIVE0; + tmp_offset = LOCAL2; } -/* Handle fixed part first. */ -if (exact > 1) +if (opcode == OP_EXACT) { - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2); if (common->mode == PCRE2_JIT_COMPLETE -#ifdef SUPPORT_UNICODE +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 && !common->utf #endif && type != OP_ANYNL && type != OP_EXTUNI) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); - add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); + OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0); + add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact))); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 + if (type == OP_ALLANY && !common->invalid_utf) +#else + if (type == OP_ALLANY) +#endif + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); + else + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } } else { + SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); } } -else if (exact == 1) - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); if (early_fail_type == type_fail_range) { @@ -11914,38 +10632,57 @@ if (early_fail_type == type_fail_range) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); - add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); + add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0); } +if (opcode < OP_EXACT) + PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL); + switch(opcode) { case OP_STAR: case OP_UPTO: - SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR); + SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR)); + max += exact; - if (type == OP_ANYNL || type == OP_EXTUNI) + if (type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); SLJIT_ASSERT(early_fail_ptr == 0); - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + if (exact == 1) + { + SLJIT_ASSERT(opcode == OP_STAR); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + { + /* If OP_EXTUNI is present, it has a separate EXACT opcode. */ + SLJIT_ASSERT(exact == 0); + + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + } if (opcode == OP_UPTO) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); + { + SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max); + } label = LABEL(); - compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); if (opcode == OP_UPTO) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); jump = JUMP(SLJIT_ZERO); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0); } /* We cannot use TMP3 because of allocate_stack. */ @@ -11965,6 +10702,9 @@ switch(opcode) { if (opcode == OP_STAR) { + if (exact == 1) + detect_partial_match(common, prev_backtracks); + if (private_data_ptr == 0) allocate_stack(common, 2); @@ -11985,6 +10725,9 @@ switch(opcode) else #endif { + /* If OP_ALLANY is present, it has a separate EXACT opcode. */ + SLJIT_ASSERT(exact == 0); + if (private_data_ptr == 0) allocate_stack(common, 2); @@ -12016,6 +10759,7 @@ switch(opcode) charpos_char = 0; charpos_othercasebit = 0; + SLJIT_ASSERT(tmp_base == TMP3); if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) { #ifdef SUPPORT_UNICODE @@ -12045,176 +10789,320 @@ switch(opcode) if (charpos_othercasebit != 0) charpos_char |= charpos_othercasebit; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; + BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE; + BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char; + BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit; + + if (private_data_ptr == 0) + allocate_stack(common, 2); + + use_tmp = (opcode == OP_STAR); + + if (use_tmp) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); + } + else + { + OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1)); + } + + /* Search the first instance of charpos_char. */ + if (exact > 0) + detect_partial_match(common, &no_match); + else + jump = JUMP(SLJIT_JUMP); + + label = LABEL(); + + if (opcode == OP_UPTO) + { + if (exact == max) + OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + else + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + } + + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + + if (exact == 0) + JUMPHERE(jump); + + detect_partial_match(common, &no_match); + + if (opcode == OP_UPTO && exact > 0) + { + if (exact == max) + CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label); + else + CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label); + } + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (use_tmp) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0); + SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3); + } + else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0); + SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH); + } + JUMPTO(SLJIT_JUMP, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + if (use_tmp) + OP1(SLJIT_MOV, base, offset1, TMP3, 0); + else + { + OP1(SLJIT_MOV, TMP1, 0, base, offset1); + OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0); + } + + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0)); + + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + break; } } - if (charpos_enabled) - { - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); + if (private_data_ptr == 0) + allocate_stack(common, 2); - /* Search the first instance of charpos_char. */ - jump = JUMP(SLJIT_JUMP); - label = LABEL(); - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO)); - } - compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); - if (early_fail_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); - JUMPHERE(jump); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + use_tmp = (opcode == OP_STAR); - detect_partial_match(common, &backtrack->own_backtracks); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (charpos_othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + if (common->utf) + { + if (!use_tmp) + OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0); - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0); + } +#endif - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); - } + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max); - /* Search the last instance of charpos_char. */ + if (opcode == OP_UPTO && exact > 0) + { label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - if (early_fail_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); detect_partial_match(common, &no_match); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (charpos_othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0); +#endif - if (opcode == OP_STAR) + if (exact == max) { - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, label); + OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); } else { - jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPHERE(jump); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label); } - set_jumps(no_match, LABEL()); - OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1)); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); } else { - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR); - SLJIT_ASSERT(!use_tmp || tmp_base == TMP3); - - if (common->utf) - OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); -#endif - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); detect_partial_match(common, &no_match); label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); + OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0); #endif if (opcode == OP_UPTO) { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); } detect_partial_match_to(common, label); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } - set_jumps(no_char1_match, LABEL()); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) + if (common->utf) + { + set_jumps(no_char1_match, LABEL()); + set_jumps(no_match, LABEL()); + if (use_tmp) { - set_jumps(no_match, LABEL()); - if (use_tmp) - { - OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); - OP1(SLJIT_MOV, base, offset0, TMP3, 0); - } - else - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); } else -#endif { - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); } + } + else +#endif + { + if (opcode != OP_UPTO || exact == 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_char1_match, LABEL()); - if (early_fail_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + + if (opcode == OP_UPTO) + { + if (exact > 0) + { + if (max == exact) + jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact); + else + jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact); + + add_jump(compiler, &backtrack->own_backtracks, jump); + } } + else if (exact == 1) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0)); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; - case OP_MINSTAR: + case OP_QUERY: + SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_MINSTAR: + case OP_MINQUERY: + SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0)); + if (private_data_ptr == 0) + allocate_stack(common, 1); + + if (exact >= 1) + { + if (exact >= 2) + { + /* Extuni has a separate exact opcode. */ + SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact); + } + + if (opcode == OP_MINQUERY) + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1); + + label = LABEL(); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label; + + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + + if (exact >= 2) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + if (opcode == OP_MINQUERY) + OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0); + else + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else + { + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + } + if (early_fail_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; case OP_MINUPTO: - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); - BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); - break; - case OP_QUERY: - case OP_MINQUERY: - SLJIT_ASSERT(early_fail_ptr == 0); - if (private_data_ptr == 0) - allocate_stack(common, 1); + if (exact == 0) + { + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } + + if (exact >= 2) + { + /* Extuni has a separate exact opcode. */ + SLJIT_ASSERT(tmp_base == TMP3); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact); + } + + label = LABEL(); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label; + + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + + if (exact >= 2) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode == OP_QUERY) - compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); - BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; case OP_EXACT: + SLJIT_ASSERT(backtrack == NULL); break; case OP_POSSTAR: + SLJIT_ASSERT(backtrack == NULL); #if defined SUPPORT_UNICODE if (type == OP_ALLANY && !common->invalid_utf) #else if (type == OP_ALLANY) #endif { + if (exact == 1) + detect_partial_match(common, prev_backtracks); + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); process_partial_match(common); if (early_fail_ptr != 0) @@ -12223,98 +11111,150 @@ switch(opcode) } #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (type == OP_EXTUNI || common->utf) + if (common->utf) { - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw)); + + if (tmp_base != TMP3) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0); + tmp_base = COUNT_MATCH; + } + + OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0); detect_partial_match(common, &no_match); label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0); detect_partial_match_to(common, label); set_jumps(no_match, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0); + + if (tmp_base != TMP3) + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); + + if (exact == 1) + add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0)); + if (early_fail_ptr != 0) - { - if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0); - else - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); - } + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; } #endif + if (exact == 1) + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match(common, &no_match); label = LABEL(); + /* Extuni never fails, so no_char1_match is not used in that case. + Anynl optionally reads an extra character on success. */ compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); detect_partial_match_to(common, label); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (type != OP_EXTUNI) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_char1_match, LABEL()); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (type != OP_EXTUNI) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + + if (exact == 1) + add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0)); + if (early_fail_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; - case OP_POSUPTO: - SLJIT_ASSERT(early_fail_ptr == 0); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); - - detect_partial_match(common, &no_match); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); - detect_partial_match_to(common, label); + case OP_POSUPTO: + SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0); + max += exact; - set_jumps(no_match, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); - break; - } +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (type == OP_EXTUNI || common->utf) +#else + if (type == OP_EXTUNI) #endif - - if (type == OP_ALLANY) { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw)); - if (common->mode == PCRE2_JIT_COMPLETE) + /* Count match is not modified by compile_char1_matchingpath. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0); + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max); + + label = LABEL(); + /* Extuni only modifies TMP3 on successful match. */ + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + + if (exact == max) { - OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); - SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_JUMP, label); } else { - jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); - process_partial_match(common); - JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + } + + set_jumps(no_match, LABEL()); + + if (exact > 0) + { + if (exact == max) + OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact); + else + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact); } + + OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2); + + if (exact > 0) + add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER)); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); break; } - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + SLJIT_ASSERT(tmp_base == TMP3); + + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max); detect_partial_match(common, &no_match); label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + + if (exact == max) + OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + else + { + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } detect_partial_match_to(common, label); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_char1_match, LABEL()); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); + + if (exact > 0) + { + if (exact == max) + jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact); + else + jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact); + + add_jump(compiler, prev_backtracks, jump); + } break; case OP_POSQUERY: - SLJIT_ASSERT(early_fail_ptr == 0); + SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0); + SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); compile_char1_matchingpath(common, type, cc, &no_match, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); @@ -12477,6 +11417,7 @@ DEFINE_COMPILER; backtrack_common *backtrack; BOOL has_then_trap = FALSE; then_trap_backtrack *save_then_trap = NULL; +size_t op_len; SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); @@ -12612,21 +11553,23 @@ while (cc < ccend) case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: - cc = compile_iterator_matchingpath(common, cc, parent); + cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); break; case OP_CLASS: case OP_NCLASS: if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE) - cc = compile_iterator_matchingpath(common, cc, parent); + cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); else cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 case OP_XCLASS: - if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) - cc = compile_iterator_matchingpath(common, cc, parent); + case OP_ECLASS: + op_len = GET(cc, 1); + if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); else cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; @@ -12634,24 +11577,26 @@ while (cc < ccend) case OP_REF: case OP_REFI: - if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) + op_len = PRIV(OP_lengths)[*cc]; + if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); - cc += 1 + IMM2_SIZE; + cc += op_len; } break; case OP_DNREF: case OP_DNREFI: - if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) + op_len = PRIV(OP_lengths)[*cc]; + if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); - cc += 1 + 2 * IMM2_SIZE; + cc += op_len; } break; @@ -12692,6 +11637,7 @@ while (cc < ccend) case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -12803,6 +11749,28 @@ SLJIT_ASSERT(cc == ccend); #define CURRENT_AS(type) ((type *)current) +static void compile_newline_move_back(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0); +/* All newlines are single byte, or their last byte +is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */ +OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL); +OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +JUMPHERE(jump); +} + static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; @@ -12825,52 +11793,104 @@ switch(opcode) { case OP_STAR: case OP_UPTO: - if (type == OP_ANYNL || type == OP_EXTUNI) + if (type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); - set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); } else { - if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) + if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled) { OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, TMP2, 0, base, offset1); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); label = LABEL(); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); - CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + if (type == OP_ANYNL) + compile_newline_move_back(common); move_back(common, NULL, TRUE); - CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + /* The range beginning must match, no need to compare. */ + JUMPTO(SLJIT_JUMP, label); + + set_jumps(current->own_backtracks, LABEL()); + current->own_backtracks = NULL; } else { OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); - move_back(common, NULL, TRUE); + + if (opcode == OP_STAR && exact == 1) + { + if (type == OP_ANYNL) + { + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + compile_newline_move_back(common); + } + + move_back(common, NULL, TRUE); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + } + else + { + if (type == OP_ANYNL) + { + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + compile_newline_move_back(common); + } + else + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + + move_back(common, NULL, TRUE); + } + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(current->own_backtracks, LABEL()); } + JUMPHERE(jump); if (private_data_ptr == 0) free_stack(common, 2); } break; + case OP_QUERY: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + jump = JUMP(SLJIT_JUMP); + set_jumps(current->own_backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + case OP_MINSTAR: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (exact == 0) + { + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else if (exact > 1) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); - set_jumps(jumplist, LABEL()); + set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL()); if (private_data_ptr == 0) free_stack(common, 1); break; @@ -12879,56 +11899,60 @@ switch(opcode) OP1(SLJIT_MOV, TMP1, 0, base, offset1); OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); - OP1(SLJIT_MOV, base, offset1, TMP1, 0); - compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + if (exact == 0) + { + add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); - set_jumps(jumplist, LABEL()); - if (private_data_ptr == 0) - free_stack(common, 2); - break; + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(jumplist, LABEL()); + } + else + { + if (exact > 1) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(current->own_backtracks, LABEL()); + } - case OP_QUERY: - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); - jump = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); - JUMPHERE(jump); if (private_data_ptr == 0) - free_stack(common, 1); + free_stack(common, 2); break; case OP_MINQUERY: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); - JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); - set_jumps(jumplist, LABEL()); - JUMPHERE(jump); + + if (exact >= 1) + { + if (exact >= 2) + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(current->own_backtracks, LABEL()); + } + else + { + jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + JUMPHERE(jump); + } + if (private_data_ptr == 0) free_stack(common, 1); break; - case OP_EXACT: - case OP_POSSTAR: - case OP_POSQUERY: - case OP_POSUPTO: - break; - default: SLJIT_UNREACHABLE(); break; } - -set_jumps(current->own_backtracks, LABEL()); } static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12938,7 +11962,7 @@ PCRE2_SPTR cc = current->cc; BOOL ref = (*cc == OP_REF || *cc == OP_REFI); PCRE2_UCHAR type; -type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; +type = cc[PRIV(OP_lengths)[*cc]]; if ((type & 0x1) == 0) { @@ -13057,7 +12081,7 @@ PCRE2_SPTR ccbegin; PCRE2_SPTR ccprev; PCRE2_UCHAR bra = OP_BRA; PCRE2_UCHAR ket; -assert_backtrack *assert; +const assert_backtrack *assert; BOOL has_alternatives; BOOL needs_control_head = FALSE; BOOL has_vreverse; @@ -13093,7 +12117,7 @@ ccbegin = cc; cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; + has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL; if (opcode == OP_CBRA || opcode == OP_SCBRA) offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) @@ -13196,14 +12220,27 @@ if (offset != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); } } +else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS)) + { + OP1(SLJIT_MOV, TMP1, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0); + + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == 0) + common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw); + } if (SLJIT_UNLIKELY(opcode == OP_ONCE)) { - if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + int framesize = CURRENT_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(framesize != 0); + if (framesize > 0) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); } once = JUMP(SLJIT_JUMP); } @@ -13228,8 +12265,8 @@ else if (has_alternatives) { sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); - SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr); - sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL()); + SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL); + sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL()); sljit_emit_op0(compiler, SLJIT_ENDBR); } else @@ -13247,7 +12284,8 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { SLJIT_ASSERT(has_alternatives); assert = CURRENT_AS(bracket_backtrack)->u.assert; - if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) + SLJIT_ASSERT(assert->framesize != 0); + if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -13258,11 +12296,11 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) cond = JUMP(SLJIT_JUMP); set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); } - else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) + else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL) { SLJIT_ASSERT(has_alternatives); cond = JUMP(SLJIT_JUMP); - set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); + set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL()); } else SLJIT_ASSERT(!has_alternatives); @@ -13283,26 +12321,33 @@ if (has_alternatives) cc += GET(cc, 1); has_vreverse = FALSE; - if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA) - { - SLJIT_ASSERT(private_data_ptr != 0); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - has_vreverse = (*ccprev == OP_VREVERSE); - if (*ccprev == OP_REVERSE || has_vreverse) - ccprev = compile_reverse_matchingpath(common, ccprev, current); - } - else if (opcode != OP_COND && opcode != OP_SCOND) + switch (opcode) { - if (opcode != OP_ONCE) - { + case OP_ASSERTBACK: + case OP_ASSERTBACK_NA: + SLJIT_ASSERT(private_data_ptr != 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + has_vreverse = (*ccprev == OP_VREVERSE); + if (*ccprev == OP_REVERSE || has_vreverse) + ccprev = compile_reverse_matchingpath(common, ccprev, current); + break; + case OP_ASSERT_SCS: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + break; + case OP_ONCE: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); + break; + case OP_COND: + case OP_SCOND: + break; + default: if (private_data_ptr != 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); - } - else - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); + break; } compile_matchingpath(common, ccprev, cc, current); @@ -13423,14 +12468,18 @@ if (has_alternatives) if (cond != NULL) { SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); - assert = CURRENT_AS(bracket_backtrack)->u.assert; - if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) + if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); - add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + assert = CURRENT_AS(bracket_backtrack)->u.assert; + SLJIT_ASSERT(assert->framesize != 0); + if (assert->framesize > 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + } } JUMPHERE(cond); } @@ -13472,6 +12521,21 @@ else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SC OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); } +else if (opcode == OP_ASSERT_SCS) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0); + free_stack(common, has_alternatives ? 3 : 2); + + set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL()); + + /* Nested scs blocks will not update this variable. */ + if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw)) + common->restore_end_ptr = 0; + } else if (opcode == OP_ONCE) { cc = ccbegin + GET(ccbegin, 1); @@ -13652,6 +12716,9 @@ if (opcode == OP_THEN || opcode == OP_THEN_ARG) } } +if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr); + if (common->local_quit_available) { /* Abort match with a fail. */ @@ -13669,8 +12736,18 @@ if (opcode == OP_SKIP_ARG) OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark)); + if (common->restore_end_ptr == 0) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); + add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); + return; + } + + jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); - add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr); + add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); + JUMPHERE(jump); return; } @@ -13702,6 +12779,7 @@ static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *com { DEFINE_COMPILER; struct sljit_jump *jump; +int framesize; int size; if (CURRENT_AS(then_trap_backtrack)->then_trap) @@ -13718,11 +12796,15 @@ free_stack(common, size); jump = JUMP(SLJIT_JUMP); set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); + +framesize = CURRENT_AS(then_trap_backtrack)->framesize; +SLJIT_ASSERT(framesize != 0); + /* STACK_TOP is set by THEN. */ -if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) +if (framesize > 0) { add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); } OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 3); @@ -13813,10 +12895,13 @@ while (current) case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: case OP_TYPEPOSUPTO: + /* Since classes has no backtracking path, this + backtrackingpath was pushed by an iterator. */ case OP_CLASS: case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: + case OP_ECLASS: #endif compile_iterator_backtrackingpath(common, current); break; @@ -13841,6 +12926,7 @@ while (current) case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -13892,8 +12978,12 @@ while (current) case OP_COMMIT: case OP_COMMIT_ARG: + if (common->restore_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr); + if (!common->local_quit_available) OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + if (common->quit_label == NULL) add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); else @@ -14154,8 +13244,7 @@ int private_data_size; PCRE2_SPTR ccend; executable_functions *functions; void *executable_func; -sljit_uw executable_size; -sljit_uw total_length; +sljit_uw executable_size, private_data_length, total_length; struct sljit_label *mainloop_label = NULL; struct sljit_label *continue_match_label; struct sljit_label *empty_match_found_label = NULL; @@ -14182,7 +13271,7 @@ memset(&rootbacktrack, 0, sizeof(backtrack_common)); memset(common, 0, sizeof(compiler_common)); common->re = re; common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); -rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size; +rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start); #ifdef SUPPORT_UNICODE common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0; @@ -14230,7 +13319,7 @@ common->name_entry_size = re->name_entry_size; common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0; common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0; #ifdef SUPPORT_UNICODE -/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ +/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */ common->utf = (re->overall_options & PCRE2_UTF) != 0; common->ucp = (re->overall_options & PCRE2_UCP) != 0; if (common->utf) @@ -14262,10 +13351,26 @@ else ccend = bracketend(common->start); /* Calculate the local space size on the stack. */ -common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); -common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data); -if (!common->optimized_cbracket) +common->ovector_start = LOCAL0; +/* Allocate space for temporary data structures. */ +private_data_length = ccend - common->start; +/* The chance of overflow is very low, but might happen on 32 bit. */ +if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32)) return PCRE2_ERROR_NOMEMORY; + +private_data_length *= sizeof(sljit_s32); +/* Align to 32 bit. */ +total_length = ((re->top_bracket + 1) + (sljit_uw)(sizeof(sljit_s32) - 1)) & ~(sljit_uw)(sizeof(sljit_s32) - 1); +if (~(sljit_uw)0 - private_data_length < total_length) + return PCRE2_ERROR_NOMEMORY; + +total_length += private_data_length; +common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data); +if (!common->private_data_ptrs) + return PCRE2_ERROR_NOMEMORY; + +memset(common->private_data_ptrs, 0, private_data_length); +common->optimized_cbracket = ((sljit_u8 *)common->private_data_ptrs) + private_data_length; #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 memset(common->optimized_cbracket, 0, re->top_bracket + 1); #else @@ -14279,16 +13384,19 @@ common->ovector_start += sizeof(sljit_sw); #endif if (!check_opcode_types(common, common->start, ccend)) { - SLJIT_FREE(common->optimized_cbracket, allocator_data); - return PCRE2_ERROR_NOMEMORY; + SLJIT_FREE(common->private_data_ptrs, allocator_data); + return PCRE2_ERROR_JIT_UNSUPPORTED; } /* Checking flags and updating ovector_start. */ -if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) +if (mode == PCRE2_JIT_COMPLETE && + (re->flags & PCRE2_LASTSET) != 0 && + (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { common->req_char_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } + if (mode != PCRE2_JIT_COMPLETE) { common->start_used_ptr = common->ovector_start; @@ -14299,19 +13407,23 @@ if (mode != PCRE2_JIT_COMPLETE) common->ovector_start += sizeof(sljit_sw); } } + if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0) { common->match_end_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } + #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD common->control_head_ptr = 1; #endif + if (common->control_head_ptr != 0) { common->control_head_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } + if (common->has_set_som) { /* Saving the real start pointer is necessary. */ @@ -14332,19 +13444,11 @@ if (common->capture_last_ptr != 0) SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); - -total_length = ccend - common->start; -common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); -if (!common->private_data_ptrs) - { - SLJIT_FREE(common->optimized_cbracket, allocator_data); - return PCRE2_ERROR_NOMEMORY; - } -memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); - private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); -if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) +if ((re->overall_options & PCRE2_ANCHORED) == 0 && + (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 && + !common->has_skip_in_assert_back) detect_early_fail(common, common->start, &private_data_size, 0, 0); set_private_data_ptrs(common, &private_data_size, ccend); @@ -14354,13 +13458,18 @@ SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); if (private_data_size > 65536) { SLJIT_FREE(common->private_data_ptrs, allocator_data); - SLJIT_FREE(common->optimized_cbracket, allocator_data); - return PCRE2_ERROR_NOMEMORY; + return PCRE2_ERROR_JIT_UNSUPPORTED; } if (common->has_then) { - common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); + total_length = ccend - common->start; + common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data); + if (!common->then_offsets) + { + SLJIT_FREE(common->private_data_ptrs, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } memset(common->then_offsets, 0, total_length); set_then_offsets(common, common->start, NULL); } @@ -14368,15 +13477,16 @@ if (common->has_then) compiler = sljit_create_compiler(allocator_data); if (!compiler) { - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); return PCRE2_ERROR_NOMEMORY; } common->compiler = compiler; /* Main pcre2_jit_exec entry. */ SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0); -sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size); +sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); @@ -14410,7 +13520,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0) mainloop_label = mainloop_entry(common); continue_match_label = LABEL(); /* Forward search if possible. */ - if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common)) ; @@ -14425,7 +13535,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0) else continue_match_label = LABEL(); -if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) +if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && + (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength)); @@ -14460,8 +13571,9 @@ compile_matchingpath(common, common->start, ccend, &rootbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -14516,8 +13628,9 @@ compile_backtrackingpath(common, rootbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -14598,6 +13711,8 @@ common->early_fail_end_ptr = 0; common->currententry = common->entries; common->local_quit_available = TRUE; quit_label = common->quit_label; +SLJIT_ASSERT(common->restore_end_ptr == 0); + if (common->currententry != NULL) { /* A free bit for each private data. */ @@ -14627,24 +13742,28 @@ if (common->currententry != NULL) SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL); sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); + if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } } + common->local_quit_available = FALSE; common->quit_label = quit_label; +SLJIT_ASSERT(common->restore_end_ptr == 0); -/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ +/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */ /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw)); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0); SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); @@ -14655,8 +13774,8 @@ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FU jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); /* Allocation failed. */ @@ -14777,8 +13896,9 @@ if (common->getucdtype != NULL) } #endif /* SUPPORT_UNICODE */ -SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); +if (common->has_then) + SLJIT_FREE(common->then_offsets, allocator_data); executable_func = sljit_generate_code(compiler, 0, NULL); executable_size = sljit_get_generated_code_size(compiler); @@ -14848,9 +13968,36 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options) { pcre2_real_code *re = (pcre2_real_code *)code; #ifdef SUPPORT_JIT +void *exec_memory; executable_functions *functions; static int executable_allocator_is_working = -1; + +if (executable_allocator_is_working == -1) + { + /* Checks whether the executable allocator is working. This check + might run multiple times in multi-threaded environments, but the + result should not be affected by it. */ + exec_memory = SLJIT_MALLOC_EXEC(32, NULL); + if (exec_memory != NULL) + { + SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL); + executable_allocator_is_working = 1; + } + else executable_allocator_is_working = 0; + } +#endif + +if (options & PCRE2_JIT_TEST_ALLOC) + { + if (options != PCRE2_JIT_TEST_ALLOC) + return PCRE2_ERROR_JIT_BADOPTION; + +#ifdef SUPPORT_JIT + return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY; +#else + return PCRE2_ERROR_JIT_UNSUPPORTED; #endif + } if (code == NULL) return PCRE2_ERROR_NULL; @@ -14912,20 +14059,6 @@ return PCRE2_ERROR_JIT_BADOPTION; if ((re->flags & PCRE2_NOJIT) != 0) return 0; -if (executable_allocator_is_working == -1) - { - /* Checks whether the executable allocator is working. This check - might run multiple times in multi-threaded environments, but the - result should not be affected by it. */ - void *ptr = SLJIT_MALLOC_EXEC(32, NULL); - if (ptr != NULL) - { - SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); - executable_allocator_is_working = 1; - } - else executable_allocator_is_working = 0; - } - if (!executable_allocator_is_working) return PCRE2_ERROR_NOMEMORY; diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c index ae5903e..8867f76 100644 --- a/src/pcre2_jit_match.c +++ b/src/pcre2_jit_match.c @@ -83,7 +83,7 @@ Arguments: Returns: > 0 => success; value is the number of ovector pairs filled = 0 => success, but ovector is not big enough - -1 => failed to match (PCRE_ERROR_NOMATCH) + -1 => failed to match (PCRE2_ERROR_NOMATCH) < -1 => some kind of unexpected problem */ diff --git a/src/pcre2_jit_neon_inc.h b/src/pcre2_jit_neon_inc.h index 4a718b6..9caa1f2 100644 --- a/src/pcre2_jit_neon_inc.h +++ b/src/pcre2_jit_neon_inc.h @@ -198,14 +198,14 @@ vect_t data = VLD1Q(*str_ptr); #if PCRE2_CODE_UNIT_WIDTH != 8 data = VANDQ(data, char_mask); #endif - + #if defined(FFCS) vect_t eq = VCEQQ(data, vc1); #elif defined(FFCS_2) vect_t eq1 = VCEQQ(data, vc1); vect_t eq2 = VCEQQ(data, vc2); -vect_t eq = VORRQ(eq1, eq2); +vect_t eq = VORRQ(eq1, eq2); #elif defined(FFCS_MASK) vect_t eq = VORRQ(data, vmask); @@ -226,7 +226,7 @@ if (p1 < *str_ptr) } else data2 = shift_left_n_lanes(data, offs1 - offs2); - + if (compare1_type == compare_match1) data = VCEQQ(data, cmp1a); else @@ -281,7 +281,7 @@ while (*str_ptr < str_end) #elif defined(FFCS_2) eq1 = VCEQQ(data, vc1); eq2 = VCEQQ(data, vc2); - eq = VORRQ(eq1, eq2); + eq = VORRQ(eq1, eq2); #elif defined(FFCS_MASK) eq = VORRQ(data, vmask); diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h index 502977f..66e93cd 100644 --- a/src/pcre2_jit_simd_inc.h +++ b/src/pcre2_jit_simd_inc.h @@ -246,10 +246,10 @@ struct sljit_jump *quit; struct sljit_jump *partial_quit[2]; vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); -sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0); -sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); -sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2); -sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3); +sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0); +sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1); +sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2); +sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3); sljit_u32 bit = 0; int i; @@ -273,17 +273,17 @@ if (common->mode == PCRE2_JIT_COMPLETE) /* First part (unaligned start) */ value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO; -sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); +sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); if (char1 != char2) - sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); -sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0); +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0); if (char1 != char2) - sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0); + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 restart = LABEL(); @@ -294,12 +294,12 @@ OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value); value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; -sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); -sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); @@ -318,11 +318,11 @@ if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, &common->failed_match, partial_quit[1]); value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; -sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); -sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0); CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); JUMPHERE(quit); @@ -380,10 +380,10 @@ struct sljit_jump *quit; jump_list *not_found = NULL; vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); -sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0); -sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); -sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2); -sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3); +sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0); +sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1); +sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2); +sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3); sljit_u32 bit = 0; int i; @@ -406,29 +406,29 @@ OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); /* First part (unaligned start) */ value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO; -sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); +sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); if (char1 != char2) - sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); -sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0); +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0); if (char1 != char2) - sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0); + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0); value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf; OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value); value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; -sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); -sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); @@ -445,12 +445,12 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; -sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0); for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); -sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0); CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); JUMPHERE(quit); @@ -488,14 +488,14 @@ sljit_u32 bit1 = 0; sljit_u32 bit2 = 0; sljit_u32 diff = IN_UCHARS(offs1 - offs2); sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); -sljit_s32 data1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0); -sljit_s32 data2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); -sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2); -sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3); -sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR4); -sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR5); -sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR6); -sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR0); +sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0); +sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1); +sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2); +sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3); +sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4); +sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5); +sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6); +sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG); struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; @@ -541,10 +541,10 @@ else } value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO; -sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, TMP1, 0); +sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, TMP1, 0); if (char1a != char1b) - sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR4, 0, TMP2, 0); + sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR4, 0, TMP2, 0); if (char2a == char2b) OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); @@ -566,18 +566,18 @@ else } } -sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR3, 0, TMP1, 0); +sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR3, 0, TMP1, 0); if (char2a != char2b) - sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR5, 0, TMP2, 0); + sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR5, 0, TMP2, 0); -sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0); +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0); if (char1a != char1b) - sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR4, SLJIT_FR4, 0); + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR4, SLJIT_VR4, 0); -sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR3, SLJIT_FR3, 0); +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR3, SLJIT_VR3, 0); if (char2a != char2b) - sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR5, SLJIT_FR5, 0); + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR5, SLJIT_VR5, 0); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 restart = LABEL(); @@ -589,11 +589,11 @@ value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf; OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; -sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0); jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); -sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff); +sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff); jump[1] = JUMP(SLJIT_JUMP); JUMPHERE(jump[0]); @@ -668,8 +668,8 @@ for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); } -sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1); -sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0); /* Ignore matches before the first STR_PTR. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); @@ -687,8 +687,8 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; -sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); -sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff); +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff); for (i = 0; i < 4; i++) { @@ -696,8 +696,8 @@ for (i = 0; i < 4; i++) fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); } -sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1); -sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0); CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); @@ -843,12 +843,13 @@ DEFINE_COMPILER; int_char ic; struct sljit_jump *partial_quit, *quit; /* Save temporary registers. */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0); +SLJIT_ASSERT(common->locals_size >= 2 * (int)sizeof(sljit_sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP3, 0); /* Prepare function arguments */ OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); -GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0); +GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset); if (char1 == char2) @@ -910,8 +911,8 @@ else } } /* Restore registers. */ -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); -OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); +OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1); /* Check return value. */ partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); @@ -1038,7 +1039,7 @@ SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); SLJIT_ASSERT(compiler->scratches == 5); /* Save temporary register STR_PTR. */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0); /* Prepare arguments for the function call. */ if (common->match_end_ptr == 0) @@ -1052,7 +1053,7 @@ else SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0); } -GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0); +GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0); OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1); OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2); ic.c.c1 = char1a; @@ -1093,7 +1094,7 @@ if (diff == 1) { } /* Restore STR_PTR register. */ -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0); /* Check return value. */ partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c index 6d95bb9..a7ade44 100644 --- a/src/pcre2_jit_test.c +++ b/src/pcre2_jit_test.c @@ -286,6 +286,7 @@ static struct regression_test_case regression_test_cases[] = { { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" }, { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" }, { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" }, + { M, A, 0, 0, "(?:a?|a)b", "ba" }, /* Greedy and non-greedy + operators */ { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" }, @@ -360,6 +361,7 @@ static struct regression_test_case regression_test_cases[] = { { MU, A, 0, 0, "(?P\\d+)m|M", "4M" }, { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" }, { 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "
    " }, + { MU, A, 0, 0, "([a-z]{0,3}c;)+", "ccccc;c;cc;ccc;cccccccccccccccc;" }, /* Bracket repeats with limit. */ { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" }, @@ -426,6 +428,7 @@ static struct regression_test_case regression_test_cases[] = { { MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" }, { MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" }, { MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" }, + { MUP, 0, 0, 0 | F_PROPERTY, "[\\x80-\\xff\\x{800}\\x{802}\\x{804}\\p{Cc}]", "\xdf\xbf\xe0\xa0\x80" }, /* Possible empty brackets. */ { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, @@ -472,10 +475,17 @@ static struct regression_test_case regression_test_cases[] = { { MU, A, 0, 0, "\\R+", "ab\r\n\r" }, { MU, A, 0, 0, "\\R*", "ab\r\n\r" }, { MU, A, 0, 0, "\\R*", "\r\n\r" }, + { M, A, 0, 0, "\\R+\x85", "\r\n\n\r#\r\x85\n" }, { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" }, { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" }, { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" }, { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" }, + { MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\r\r\nab\r\r\n\n" }, + { MU, A, 0, 0, "\\R{2,4}\n", "\r\n\nab\n\n\n\r\r\n" }, + { MU, A, 0, 0, "\\R{3,}\n", "\r\n\r\n\nab\n\n\n\r\r\n\n" }, + { MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\n" }, + { MU, A, 0, 0, "\\R{0,3}\n", "\r\n\r\n\r\n\r" }, + { MU, A, 0, 0, "(\\R{0,3}\n;)+", "\r\n\r\n\r\n\r\n\n;\n;\n\n;\n\n\n;\n\n\n\n\n;" }, { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" }, { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" }, { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" }, @@ -601,6 +611,7 @@ static struct regression_test_case regression_test_cases[] = { { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{1,3}M", "aaaaaaaabbbbaabbbbm" }, { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "^(?P..)(?P..)\\k{2,4}", "AaAAAaAaAaaA" }, { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" }, { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" }, @@ -811,6 +822,8 @@ static struct regression_test_case regression_test_cases[] = { { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" }, { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" }, { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" }, + { M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P..)(?P..)\\k{2,4}", "AaAAAaAaAaA" }, + { M | PCRE2_DUPNAMES, A, PCRE2_PARTIAL_HARD, 0, "^(?P..)(?P..)\\k{2,4}", "AaAAAaAaAaa" }, /* (*MARK) verb. */ { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" }, diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index ac8b63b..0474cc7 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -155,10 +155,10 @@ return yield; PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables) { - if (gcontext) - gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); - else - free((void *)tables); +if (gcontext != NULL) + gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); +else + free((void *)tables); } #endif diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 6c422c2..5adc034 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -155,17 +155,17 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, - RM31, RM32, RM33, RM34, RM35, RM36, RM37 }; + RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 }; #ifdef SUPPORT_WIDE_CHARS -enum { RM100=100, RM101 }; +enum { RM100=100, RM101, RM102, RM103 }; #endif #ifdef SUPPORT_UNICODE enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, - RM224, RM225 }; + RM224 }; #endif /* Define short names for general fields in the current backtrack frame, which @@ -348,6 +348,7 @@ seems unlikely.) Arguments: offset index into the offset vector caseless TRUE if caseless + caseopts bitmask of REFI_FLAG_XYZ values F the current backtracking frame pointer mb points to match block lengthptr pointer for returning the length matched @@ -358,8 +359,8 @@ Returns: = 0 sucessful match; number of code units matched is set */ static int -match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb, - PCRE2_SIZE *lengthptr) +match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F, + match_block *mb, PCRE2_SIZE *lengthptr) { PCRE2_SPTR p; PCRE2_SIZE length; @@ -389,6 +390,8 @@ if (caseless) { #if defined SUPPORT_UNICODE BOOL utf = (mb->poptions & PCRE2_UTF) != 0; + BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0; + BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0; if (utf || (mb->poptions & PCRE2_UCP) != 0) { @@ -420,10 +423,20 @@ if (caseless) d = *p++; } - ur = GET_UCD(d); - if (c != d && c != (uint32_t)((int)d + ur->other_case)) + if (turkish_casing && UCD_ANY_I(d)) + { + c = UCD_FOLD_I_TURKISH(c); + d = UCD_FOLD_I_TURKISH(d); + if (c != d) return -1; /* No match */ + } + else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case)) { const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset; + + /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets + that start with an ASCII character. */ + if (caseless_restrict && *pp < 128) return -1; /* No match */ + for (;;) { if (c < *pp) return -1; /* No match */ @@ -528,38 +541,46 @@ For hard partial matching, we immediately return a partial match. Otherwise, carrying on means that a complete match on the current subject will be sought. A partial match is returned only if no complete match can be found. */ -#define CHECK_PARTIAL()\ - if (Feptr >= mb->end_subject) \ - { \ - SCHECK_PARTIAL(); \ - } - -#define SCHECK_PARTIAL()\ - if (mb->partial != 0 && \ - (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ - { \ - mb->hitend = TRUE; \ - if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ - } +#define CHECK_PARTIAL() \ + do { \ + if (Feptr >= mb->end_subject) \ + { \ + SCHECK_PARTIAL(); \ + } \ + } \ + while (0) + +#define SCHECK_PARTIAL() \ + do { \ + if (mb->partial != 0 && \ + (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ + { \ + mb->hitend = TRUE; \ + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ + } \ + } \ + while (0) /* These macros are used to implement backtracking. They simulate a recursive call to the match() function by means of a local vector of frames which remember the backtracking points. */ -#define RMATCH(ra,rb)\ - {\ - start_ecode = ra;\ - Freturn_id = rb;\ - goto MATCH_RECURSE;\ - L_##rb:;\ - } +#define RMATCH(ra,rb) \ + do { \ + start_ecode = ra; \ + Freturn_id = rb; \ + goto MATCH_RECURSE; \ + L_##rb:; \ + } \ + while (0) -#define RRETURN(ra)\ - {\ - rrc = ra;\ - goto RETURN_SWITCH;\ - } +#define RRETURN(ra) \ + do { \ + rrc = ra; \ + goto RETURN_SWITCH; \ + } \ + while (0) @@ -813,7 +834,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, offset = Flast_group_offset; for(;;) { + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (N->group_frame_type == (GF_CAPTURE | number)) break; @@ -852,7 +876,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, offset = Flast_group_offset; for(;;) { + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break; @@ -1329,7 +1356,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } else /* Maximize */ @@ -1430,7 +1457,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH); Feptr++; } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } else /* Maximize */ @@ -1488,7 +1515,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } else /* Maximize */ { @@ -1706,7 +1733,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, Feptr++; } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* Maximize case */ @@ -1844,7 +1871,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* Maximize case */ @@ -1928,7 +1955,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #define Lmax F->temp_32[1] #define Lstart_eptr F->temp_sptr[0] #define Lbyte_map_address F->temp_sptr[1] -#define Lbyte_map ((unsigned char *)Lbyte_map_address) +#define Lbyte_map ((const unsigned char *)Lbyte_map_address) case OP_NCLASS: case OP_CLASS: @@ -2071,7 +2098,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* If maximizing, find the longest possible run, then work backwards. */ @@ -2151,7 +2178,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } } - /* Control never gets here */ + + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lbyte_map_address #undef Lbyte_map @@ -2219,7 +2247,9 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH); + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); } /* If Lmax == Lmin we can just continue with the main loop. */ @@ -2242,9 +2272,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH); + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* If maximizing, find the longest possible run, then work backwards. */ @@ -2265,7 +2297,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #else fc = *Feptr; #endif - if (!PRIV(xclass)(fc, Lxclass_data, utf)) break; + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) break; Feptr += len; } @@ -2287,7 +2320,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ } #endif /* SUPPORT_WIDE_CHARS: end of XCLASS */ @@ -2297,6 +2330,151 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #undef Lmax + /* ===================================================================== */ + /* Match a complex, set-based character class. This opcodes are used when + there is complex nesting or logical operations within the character + class. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Leclass_data F->temp_sptr[1] +#define Leclass_len F->temp_size +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] + +#ifdef SUPPORT_WIDE_CHARS + case OP_ECLASS: + { + Leclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */ + Fecode += GET(Fecode, 1); /* Advance past the item */ + Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data); + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + + /* If Lmax == Lmin we can just continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM102); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } +#ifdef SUPPORT_UNICODE + GETCHARLENTEST(fc, Feptr, len); +#else + fc = *Feptr; +#endif + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + RMATCH(Fecode, RM103); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ +#ifdef SUPPORT_UNICODE + if (utf) BACKCHAR(Feptr); +#endif + } + RRETURN(MATCH_NOMATCH); + } + + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } +#endif /* SUPPORT_WIDE_CHARS: end of ECLASS */ + +#undef Lstart_eptr +#undef Leclass_data +#undef Leclass_len +#undef Lmin +#undef Lmax + + /* ===================================================================== */ /* Match various character types when PCRE2_UCP is not set. These opcodes are not generated when PCRE2_UCP is set - instead appropriate property @@ -2492,10 +2670,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, switch(Fecode[1]) { - case PT_ANY: - if (notmatch) RRETURN(MATCH_NOMATCH); - break; - case PT_LAMP: chartype = prop->chartype; if ((chartype == ucp_Lu || @@ -2606,6 +2780,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, /* This should never occur */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -2728,19 +2903,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { - case PT_ANY: - if (notmatch) RRETURN(MATCH_NOMATCH); - for (i = 1; i <= Lmin; i++) - { - if (Feptr >= mb->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(fc, Feptr); - } - break; - case PT_LAMP: for (i = 1; i <= Lmin; i++) { @@ -2969,6 +3131,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, /* This should not occur */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -3244,6 +3407,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } /* End switch(Lctype) */ @@ -3496,6 +3660,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -3516,27 +3681,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { switch(proptype) { - case PT_ANY: - for (;;) - { - RMATCH(Fecode, RM208); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); - if (Feptr >= mb->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - GETCHARINCTEST(fc, Feptr); - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); - } - /* Control never gets here */ - case PT_LAMP: for (;;) { int chartype; - RMATCH(Fecode, RM209); + RMATCH(Fecode, RM208); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3551,12 +3700,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_GC: for (;;) { - RMATCH(Fecode, RM210); + RMATCH(Fecode, RM209); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3568,12 +3717,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_PC: for (;;) { - RMATCH(Fecode, RM211); + RMATCH(Fecode, RM210); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3585,12 +3734,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_SC: for (;;) { - RMATCH(Fecode, RM212); + RMATCH(Fecode, RM211); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3602,14 +3751,14 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_SCX: for (;;) { BOOL ok; const ucd_record *prop; - RMATCH(Fecode, RM225); + RMATCH(Fecode, RM224); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3624,13 +3773,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (ok == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_ALNUM: for (;;) { int category; - RMATCH(Fecode, RM213); + RMATCH(Fecode, RM212); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3643,7 +3792,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ /* Perl space used to exclude VT, but from Perl 5.18 it is included, which means that Perl space and POSIX space are now identical. PCRE @@ -3653,7 +3802,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case PT_PXSPACE: /* POSIX space */ for (;;) { - RMATCH(Fecode, RM214); + RMATCH(Fecode, RM213); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3675,13 +3824,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_WORD: for (;;) { int chartype, category; - RMATCH(Fecode, RM215); + RMATCH(Fecode, RM214); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3698,13 +3847,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, chartype == ucp_Pc) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_CLIST: for (;;) { const uint32_t *cp; - RMATCH(Fecode, RM216); + RMATCH(Fecode, RM215); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3735,12 +3884,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } } } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_UCNC: for (;;) { - RMATCH(Fecode, RM217); + RMATCH(Fecode, RM216); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3754,12 +3903,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, fc >= 0xe000) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_BIDICL: for (;;) { - RMATCH(Fecode, RM224); + RMATCH(Fecode, RM223); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3771,14 +3920,14 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ case PT_BOOL: for (;;) { BOOL ok; const ucd_record *prop; - RMATCH(Fecode, RM223); + RMATCH(Fecode, RM222); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3793,10 +3942,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (ok == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ /* This should never occur */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -3808,7 +3958,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { for (;;) { - RMATCH(Fecode, RM218); + RMATCH(Fecode, RM217); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3835,7 +3985,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, { for (;;) { - RMATCH(Fecode, RM219); + RMATCH(Fecode, RM218); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); if (Feptr >= mb->end_subject) @@ -3951,6 +4101,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } @@ -4095,11 +4246,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } } } - /* Control never gets here */ + + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ } /* If maximizing, it is worth using inline code for speed, doing the type @@ -4117,21 +4270,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { - case PT_ANY: - for (i = Lmin; i < Lmax; i++) - { - int len = 1; - if (Feptr >= mb->end_subject) - { - SCHECK_PARTIAL(); - break; - } - GETCHARLENTEST(fc, Feptr, len); - if (notmatch) break; - Feptr+= len; - } - break; - case PT_LAMP: for (i = Lmin; i < Lmax; i++) { @@ -4377,6 +4515,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -4391,7 +4530,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for(;;) { if (Feptr <= Lstart_eptr) break; - RMATCH(Fecode, RM222); + RMATCH(Fecode, RM221); if (rrc != MATCH_NOMATCH) RRETURN(rrc); Feptr--; if (utf) BACKCHAR(Feptr); @@ -4434,7 +4573,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, PCRE2_SPTR fptr; if (Feptr <= Lstart_eptr) break; /* At start of char run */ - RMATCH(Fecode, RM220); + RMATCH(Fecode, RM219); if (rrc != MATCH_NOMATCH) RRETURN(rrc); /* Backtracking over an extended grapheme cluster involves inspecting @@ -4695,6 +4834,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -4707,7 +4847,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for(;;) { if (Feptr <= Lstart_eptr) break; - RMATCH(Fecode, RM221); + RMATCH(Fecode, RM220); if (rrc != MATCH_NOMATCH) RRETURN(rrc); Feptr--; BACKCHAR(Feptr); @@ -4952,6 +5092,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -4988,16 +5129,18 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #define Lmin F->temp_32[0] #define Lmax F->temp_32[1] #define Lcaseless F->temp_32[2] +#define Lcaseopts F->temp_32[3] #define Lstart F->temp_sptr[0] #define Loffset F->temp_size case OP_DNREF: case OP_DNREFI: Lcaseless = (Fop == OP_DNREFI); + Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0; { int count = GET2(Fecode, 1+IMM2_SIZE); PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; - Fecode += 1 + 2*IMM2_SIZE; + Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0); while (count-- > 0) { @@ -5011,8 +5154,9 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case OP_REF: case OP_REFI: Lcaseless = (Fop == OP_REFI); + Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0; Loffset = (GET2(Fecode, 1) << 1) - 2; - Fecode += 1 + IMM2_SIZE; + Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0); /* Set up for repetition, or handle the non-repeated case. The maximum and minimum must be in the heap frame, but as they are short-term values, we @@ -5044,7 +5188,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, default: /* No repeat follows */ { - rrc = match_ref(Loffset, Lcaseless, F, mb, &length); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length); if (rrc != 0) { if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ @@ -5078,7 +5222,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for (i = 1; i <= Lmin; i++) { PCRE2_SIZE slength; - rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); if (rrc != 0) { if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ @@ -5102,7 +5246,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RMATCH(Fecode, RM20); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); - rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); if (rrc != 0) { if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ @@ -5111,7 +5255,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } Feptr += slength; } - /* Control never gets here */ + + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* If maximizing, find the longest string and work backwards, as long as @@ -5126,7 +5271,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for (i = Lmin; i < Lmax; i++) { PCRE2_SIZE slength; - rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); if (rrc != 0) { /* Can't use CHECK_PARTIAL because we don't want to update Feptr in @@ -5177,7 +5322,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, for (i = Lmin; i < Lmax; i++) { PCRE2_SIZE slength; - (void)match_ref(Loffset, Lcaseless, F, mb, &slength); + (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); Feptr += slength; } } @@ -5185,7 +5330,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } - /* Control never gets here */ + + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ #undef Lcaseless #undef Lmin @@ -5409,7 +5555,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, Fecode += GET(Fecode, 1); if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH); } - /* Control never reaches here. */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lframe_type @@ -5494,7 +5640,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, Lstart_branch = next_ecode; if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH); } - /* Control never reaches here. */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lframe_type #undef Lstart_branch @@ -5585,6 +5731,132 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #undef Lframe_type + /* ===================================================================== */ + /* Handle scan substring operation. */ + +#define Lframe_type F->temp_32[0] +#define Lextra_size F->temp_32[1] +#define Lsaved_moptions F->temp_32[2] +#define Lsaved_end_subject F->temp_sptr[0] +#define Lsaved_eptr F->temp_sptr[1] +#define Ltrue_end_extra F->temp_size + + case OP_ASSERT_SCS: + { + PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE; + uint32_t extra_size = 0; + int count; + PCRE2_SPTR slot; + + /* Disable compiler warning. */ + offset = 0; + (void)offset; + + for (;;) + { + if (*ecode == OP_CREF) + { + extra_size += 1+IMM2_SIZE; + offset = (GET2(ecode, 1) << 1) - 2; + ecode += 1+IMM2_SIZE; + if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET) + goto SCS_OFFSET_FOUND; + continue; + } + + if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH); + + count = GET2(ecode, 1 + IMM2_SIZE); + slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size; + extra_size += 1+2*IMM2_SIZE; + ecode += 1+2*IMM2_SIZE; + + while (count > 0) + { + offset = (GET2(slot, 0) << 1) - 2; + if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET) + goto SCS_OFFSET_FOUND; + slot += mb->name_entry_size; + count--; + } + } + + SCS_OFFSET_FOUND: + + /* Skip remaining options. */ + for (;;) + { + if (*ecode == OP_CREF) + { + extra_size += 1+IMM2_SIZE; + ecode += 1+IMM2_SIZE; + } + else if (*ecode == OP_DNCREF) + { + extra_size += 1+2*IMM2_SIZE; + ecode += 1+2*IMM2_SIZE; + } + else break; + } + + Lextra_size = extra_size; + } + + Lsaved_end_subject = mb->end_subject; + Ltrue_end_extra = mb->true_end_subject - mb->end_subject; + Lsaved_eptr = Feptr; + Lsaved_moptions = mb->moptions; + + Feptr = mb->start_subject + Fovector[offset]; + mb->true_end_subject = mb->end_subject = + mb->start_subject + Fovector[offset + 1]; + mb->moptions &= ~PCRE2_NOTEOL; + + Lframe_type = GF_NOCAPTURE | Fop; + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38); + if (rrc == MATCH_ACCEPT) + { + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; + break; + } + + if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) + { + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; + RRETURN(rrc); + } + + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) + { + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; + RRETURN(MATCH_NOMATCH); + } + Lextra_size = 0; + } + + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + Feptr = Lsaved_eptr; + break; + +#undef Lframe_type +#undef Lextra_size +#undef Lsaved_end_subject +#undef Lsaved_eptr +#undef Ltrue_end_extra +#undef Lsave_moptions /* ===================================================================== */ /* The callout item calls an external function, if one is provided, passing @@ -5795,8 +6067,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, #ifdef SUPPORT_UNICODE if (utf) { - while (number-- > 0) + /* We used to do a simpler `while (number-- > 0)` but that triggers + clang's unsigned integer overflow sanitizer. */ + while (number > 0) { + --number; if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH); Feptr--; BACKCHAR(Feptr); @@ -5869,7 +6144,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } /* Now try matching, moving forward one character on failure, until we - reach the mimimum back length. */ + reach the minimum back length. */ for (;;) { @@ -5881,7 +6156,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); } #endif } - /* Control never reaches here */ + PCRE2_UNREACHABLE(); /* Control never reaches here */ #undef Lmin #undef Lmax @@ -5931,14 +6206,20 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, (char *)P->eptr - (char *)mb->start_subject); #endif - /* If we are at the end of an assertion that is a condition, return a - match, discarding any intermediate backtracking points. Copy back the - mark setting and the captures into the frame before N so that they are - set on return. Doing this for all assertions, both positive and negative, - seems to match what Perl does. */ + /* If we are at the end of an assertion that is a condition, first check + to see if we are at the end of a variable-length branch in a lookbehind. + If this is the case and we have not landed on the current character, + return no match. Compare code below for non-condition lookbehinds. In + other cases, return a match, discarding any intermediate backtracking + points. Copy back the mark setting and the captures into the frame before + N so that they are set on return. Doing this for all assertions, both + positive and negative, seems to match what Perl does. */ if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT) { + if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) && + branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); memcpy((char *)P + offsetof(heapframe, ovector), Fovector, Foffset_top * sizeof(PCRE2_SIZE)); P->offset_top = Foffset_top; @@ -5967,7 +6248,11 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, /* It is the end of whole-pattern recursion. */ offset = Flast_group_offset; + + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); Flast_group_offset = P->last_group_offset; @@ -6042,6 +6327,23 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case OP_ASSERT_NOT: RRETURN(MATCH_MATCH); + /* A scan substring group must preserve the current end_subject, + and restore it before the backtracking is performed into its sub + pattern. */ + + case OP_ASSERT_SCS: + F->temp_sptr[0] = mb->end_subject; + mb->end_subject = P->temp_sptr[0]; + mb->true_end_subject = mb->end_subject + P->temp_size; + Feptr = P->temp_sptr[1]; + + RMATCH(Fecode + 1 + LINK_SIZE, RM39); + + mb->end_subject = F->temp_sptr[0]; + mb->true_end_subject = mb->end_subject; + RRETURN(rrc); + break; + /* At the end of a script run, apply the script-checking rules. This code will never by exercised if Unicode support it not compiled, because in that environment script runs cause an error at compile time. */ @@ -6165,8 +6467,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, case OP_EODN: ASSERT_NL_OR_EOS: - if (Feptr < mb->end_subject && - (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen)) + if (Feptr < mb->true_end_subject && + (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen)) { if (mb->partial != 0 && Feptr + 1 >= mb->end_subject && @@ -6447,6 +6749,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, something seriously wrong in the code above or the OP_xxx definitions. */ default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } @@ -6455,8 +6758,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, loop. */ } /* End of main loop */ -/* Control never reaches here */ +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ /* ========================================================================= */ /* The RRETURN() macro jumps here. The number that is saved in Freturn_id @@ -6482,20 +6785,21 @@ switch (Freturn_id) LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) - LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) + LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) #ifdef SUPPORT_WIDE_CHARS - LBL(100) LBL(101) + LBL(100) LBL(101) LBL(102) LBL(103) #endif #ifdef SUPPORT_UNICODE LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) - LBL(221) LBL(222) LBL(223) LBL(224) LBL(225) + LBL(221) LBL(222) LBL(223) LBL(224) #endif default: + PCRE2_DEBUG_UNREACHABLE(); return PCRE2_ERROR_INTERNAL; } #undef LBL @@ -6621,7 +6925,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the options variable for this function. Users of PCRE2 who are not calling the function directly would like to have a way of setting these flags, in the same -way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with +way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now transfer to the options for this function. The bits are guaranteed to be @@ -6703,9 +7007,6 @@ if (use_jit) #ifdef SUPPORT_UNICODE if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid) { -#if PCRE2_CODE_UNIT_WIDTH != 32 - unsigned int i; -#endif /* For 8-bit and 16-bit UTF, check that the first code unit is a valid character start. */ @@ -6726,7 +7027,7 @@ if (use_jit) start of matching. */ #if PCRE2_CODE_UNIT_WIDTH != 32 - for (i = re->max_lookbehind; i > 0 && start_match > subject; i--) + for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--) { start_match--; while (start_match > subject && @@ -6973,10 +7274,10 @@ mb->mark = mb->nomatch_mark = NULL; /* In case never set */ /* The name table is needed for finding all the numbers associated with a given name, for condition testing. The code follows the name table. */ -mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)); +mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)); mb->name_count = re->name_count; mb->name_entry_size = re->name_entry_size; -mb->start_code = mb->name_table + re->name_count * re->name_entry_size; +mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start); /* Process the \R and newline settings. */ @@ -7013,7 +7314,9 @@ switch(re->newline_convention) mb->nltype = NLTYPE_ANYCRLF; break; - default: return PCRE2_ERROR_INTERNAL; + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; } /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE @@ -7159,7 +7462,7 @@ for(;;) However, there is an option (settable at compile time) that disables these, for testing and for ensuring that all callouts do actually occur. */ - if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) { /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. That is, the match must be before or at the diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c index 757dab9..100e7c9 100644 --- a/src/pcre2_match_data.c +++ b/src/pcre2_match_data.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -77,14 +77,16 @@ return yield; * Create a match data block using pattern data * *************************************************/ -/* If no context is supplied, use the memory allocator from the code. */ +/* If no context is supplied, use the memory allocator from the code. This code +assumes that a general context contains nothing other than a memory allocator. +If that ever changes, this code will need fixing. */ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION pcre2_match_data_create_from_pattern(const pcre2_code *code, pcre2_general_context *gcontext) { if (gcontext == NULL) gcontext = (pcre2_general_context *)code; -return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, +return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1, gcontext); } diff --git a/src/pcre2_ord2utf.c b/src/pcre2_ord2utf.c index 1403730..a1e9e08 100644 --- a/src/pcre2_ord2utf.c +++ b/src/pcre2_ord2utf.c @@ -117,4 +117,4 @@ return 1; } #endif /* SUPPORT_UNICODE */ -/* End of pcre_ord2utf.c */ +/* End of pcre2_ord2utf.c */ diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index a29f5ef..fe4d3c6 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ Returns: 0 when data returned PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) { -const pcre2_real_code *re = (pcre2_real_code *)code; +const pcre2_real_code *re = (const pcre2_real_code *)code; if (where == NULL) /* Requests field length */ { @@ -230,7 +230,8 @@ switch(what) break; case PCRE2_INFO_NAMETABLE: - *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); + *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re + + sizeof(pcre2_real_code)); break; case PCRE2_INFO_NEWLINE: @@ -268,7 +269,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_callout_enumerate(const pcre2_code *code, int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data) { -pcre2_real_code *re = (pcre2_real_code *)code; +const pcre2_real_code *re = (const pcre2_real_code *)code; pcre2_callout_enumerate_block cb; PCRE2_SPTR cc; #ifdef SUPPORT_UNICODE @@ -291,7 +292,7 @@ if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; cb.version = 0; -cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) +cc = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) + re->name_count * re->name_entry_size; while (TRUE) @@ -383,8 +384,9 @@ while (TRUE) #endif break; -#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 +#ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: cc += GET(cc, 1); break; #endif diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 870e283..84f84f8 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -53,6 +53,7 @@ pcre2_internal.h, which is #included by pcre2test before this file. */ #ifndef OP_LISTS_DEFINED static const char *OP_names[] = { OP_NAME_LIST }; +STATIC_ASSERT(sizeof(OP_names)/sizeof(*OP_names) == OP_TABLE_LENGTH, OP_names); #define OP_LISTS_DEFINED #endif @@ -65,13 +66,17 @@ static const char *OP_names[] = { OP_NAME_LIST }; #define print_custring PCRE2_SUFFIX(print_custring_) #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_) #define print_prop PCRE2_SUFFIX(print_prop_) +#define print_char_list PCRE2_SUFFIX(print_char_list_) +#define print_map PCRE2_SUFFIX(print_map_) +#define print_class PCRE2_SUFFIX(print_class_) /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that the definition is next to the definition of the opcodes in pcre2_internal.h. The contents of the table are, however, mode-dependent. */ static const uint8_t OP_lengths[] = { OP_LENGTHS }; - +STATIC_ASSERT(sizeof(OP_lengths)/sizeof(*OP_lengths) == OP_TABLE_LENGTH, + PCRE2_SUFFIX(OP_lengths_)); /************************************************* @@ -245,7 +250,7 @@ const char *yield = "??"; size_t len = 0; unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype; -for (int i = PRIV(utt_size) - 1; i >= 0; i--) +for (ptrdiff_t i = PRIV(utt_size) - 1; i >= 0; i--) { const ucp_type_table *u = PRIV(utt) + i; @@ -318,6 +323,298 @@ else +/************************************************* +* Print character list * +*************************************************/ + +/* Prints the characters and character ranges in a character list. + +Arguments: + f file to write to + code pointer in the compiled code +*/ + +static PCRE2_SPTR +print_char_list(FILE *f, PCRE2_SPTR code, const uint8_t *char_lists_end) +{ +uint32_t type, list_ind; +uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD; +uint32_t range_start = ~(uint32_t)0, range_end = 0; +const uint8_t *next_char; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +type = (uint32_t)(code[0] << 8) | code[1]; +code += 2; +#else +type = code[0]; +code++; +#endif /* CODE_UNIT_WIDTH */ + +/* Align characters. */ +next_char = char_lists_end - (GET(code, 0) << 1); +type &= XCL_TYPE_MASK; +list_ind = 0; + +if ((type & XCL_BEGIN_WITH_RANGE) != 0) + range_start = XCL_CHAR_LIST_LOW_16_START; + +while (type > 0) + { + uint32_t item_count = type & XCL_ITEM_COUNT_MASK; + + if (item_count == XCL_ITEM_COUNT_MASK) + { + if (list_ind <= 1) + { + item_count = *(const uint16_t*)next_char; + next_char += 2; + } + else + { + item_count = *(const uint32_t*)next_char; + next_char += 4; + } + } + + while (item_count > 0) + { + if (list_ind <= 1) + { + range_end = *(const uint16_t*)next_char; + next_char += 2; + } + else + { + range_end = *(const uint32_t*)next_char; + next_char += 4; + } + + if ((range_end & XCL_CHAR_END) != 0) + { + range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + if (range_start < range_end) + fprintf(f, "\\x{%x}-", range_start); + + fprintf(f, "\\x{%x}", range_end); + range_start = ~(uint32_t)0; + } + else + range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + item_count--; + } + + list_ind++; + type >>= XCL_TYPE_BIT_LEN; + + /* The following code could be optimized to 8/16/32 bit, + but it is not worth it for a debugging function. */ + + if (range_start == ~(uint32_t)0) + { + if ((type & XCL_BEGIN_WITH_RANGE) != 0) + { + if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START; + else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START; + else range_start = XCL_CHAR_LIST_HIGH_32_START; + } + } + else if ((type & XCL_BEGIN_WITH_RANGE) == 0) + { + fprintf(f, "\\x{%x}-", range_start); + + if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END; + else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END; + else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END; + else range_end = XCL_CHAR_LIST_HIGH_32_END; + + fprintf(f, "\\x{%x}", range_end); + range_start = ~(uint32_t)0; + } + + if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD; + else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD; + else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD; + } + +return code + LINK_SIZE; +} + + + +/************************************************* +* Print a character bitmap * +*************************************************/ + +/* Prints a 32-byte bitmap, which occurs within a character class opcode. + +Arguments: + f file to write to + map pointer to the bitmap + negated TRUE if the bitmap will be printed as negated + +Returns: nothing +*/ + +static void +print_map(FILE *f, const uint8_t *map, BOOL negated) +{ +BOOL first = TRUE; +uint8_t inverted_map[32]; +int i; + +if (negated) + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i]; + map = inverted_map; + } + +for (i = 0; i < 256; i++) + { + if ((map[i/8] & (1u << (i&7))) != 0) + { + int j; + for (j = i+1; j < 256; j++) + if ((map[j/8] & (1u << (j&7))) == 0) break; + if (i == '-' || i == '\\' || i == ']' || (first && i == '^')) + fprintf(f, "\\"); + if (PRINTABLE(i)) fprintf(f, "%c", i); + else fprintf(f, "\\x%02x", i); + first = FALSE; + if (--j > i) + { + if (j != i + 1) fprintf(f, "-"); + if (j == '-' || j == '\\' || j == ']') fprintf(f, "\\"); + if (PRINTABLE(j)) fprintf(f, "%c", j); + else fprintf(f, "\\x%02x", j); + } + i = j; + } + } +} + + + +/************************************************* +* Print character class * +*************************************************/ + +/* Prints a character class, which must be either an OP_CLASS, OP_NCLASS, or +OP_XCLASS. + +Arguments: + f file to write to + type OP_CLASS, OP_NCLASS, or OP_XCLASS + code pointer in the compiled code (after the OP tag) + utf TRUE if re is UTF (will be FALSE if UTF is not supported) + before text to print before + after text to print after + +Returns: nothing +*/ + +static void +print_class(FILE *f, int type, PCRE2_SPTR code, const uint8_t *char_lists_end, + BOOL utf, const char *before, const char *after) +{ +BOOL printmap, negated; +PCRE2_SPTR ccode; + +/* Negative XCLASS and NCLASS both have a bitmap indicating which characters +are accepted. For clarity we print this inverted and prefixed by "^". */ +if (type == OP_XCLASS) + { + ccode = code + LINK_SIZE; + printmap = (*ccode & XCL_MAP) != 0; + negated = (*ccode & XCL_NOT) != 0; + ccode++; + } +else /* CLASS or NCLASS */ + { + printmap = TRUE; + negated = type == OP_NCLASS; + ccode = code; + } + +fprintf(f, "%s[%s", before, negated? "^" : ""); + +/* Print a bit map */ +if (printmap) + { + print_map(f, (const uint8_t *)ccode, negated); + ccode += 32 / sizeof(PCRE2_UCHAR); + } + +/* For an XCLASS there is always some additional data */ +if (type == OP_XCLASS) + { + PCRE2_UCHAR ch; + + while ((ch = *ccode++) != XCL_END) + { + const char *notch = ""; + + if (ch >= XCL_LIST) + { + ccode = print_char_list(f, ccode - 1, char_lists_end); + break; + } + + switch(ch) + { + case XCL_NOTPROP: + notch = "^"; + /* Fall through */ + case XCL_PROP: + { + unsigned int ptype = *ccode++; + unsigned int pvalue = *ccode++; + const char *s; + switch(ptype) + { + case PT_PXGRAPH: + fprintf(f, "[:%sgraph:]", notch); + break; + case PT_PXPRINT: + fprintf(f, "[:%sprint:]", notch); + break; + case PT_PXPUNCT: + fprintf(f, "[:%spunct:]", notch); + break; + case PT_PXXDIGIT: + fprintf(f, "[:%sxdigit:]", notch); + break; + default: + s = get_ucpname(ptype, pvalue); + fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'), + toupper(s[0]), s+1); + break; + } + } + break; + + default: + ccode += 1 + print_char(f, ccode, utf); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, utf); + } + break; + } + } + + PCRE2_ASSERT(ccode == code + (GET(code, 0) - 1)); + } + +/* Indicate a non-UTF class which was created by negation */ +fprintf(f, "]%s", after); +} + + + /************************************************* * Print compiled pattern * *************************************************/ @@ -342,7 +639,7 @@ uint32_t nesize = re->name_entry_size; BOOL utf = (re->overall_options & PCRE2_UTF) != 0; nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); -code = codestart = nametable + re->name_count * re->name_entry_size; +code = codestart = (PCRE2_SPTR)((uint8_t *)re + re->code_start); for(;;) { @@ -359,20 +656,6 @@ for(;;) switch(*code) { -/* ========================================================================== */ - /* These cases are never obeyed. This is a fudge that causes a compile- - time error if the vectors OP_names or OP_lengths, which are indexed - by opcode, are not the correct length. It seems to be the only way to do - such a check at compile time, as the sizeof() operator does not work in - the C preprocessor. */ - - case OP_TABLE_LENGTH: - case OP_TABLE_LENGTH + - ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && - (sizeof(OP_lengths) == OP_TABLE_LENGTH)): - return; -/* ========================================================================== */ - case OP_END: fprintf(f, " %s\n", OP_names[*code]); fprintf(f, "------------------------------------------------------------------\n"); @@ -424,6 +707,7 @@ for(;;) case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: case OP_ONCE: case OP_SCRIPT_RUN: case OP_COND: @@ -457,7 +741,7 @@ for(;;) case OP_DNCREF: { PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; - fprintf(f, " %s Cond ref <", flag); + fprintf(f, " %s Capture ref <", flag); print_custring(f, entry); fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); } @@ -574,7 +858,7 @@ for(;;) case OP_NOT: fprintf(f, " %s [^", flag); extra = print_char(f, code + 1, utf); - fprintf(f, "]"); + fprintf(f, "] (not)"); break; case OP_NOTSTARI: @@ -600,7 +884,7 @@ for(;;) case OP_NOTPOSQUERY: fprintf(f, " %s [^", flag); extra = print_char(f, code + 1, utf); - fprintf(f, "]%s", OP_names[*code]); + fprintf(f, "]%s (not)", OP_names[*code]); break; case OP_NOTEXACTI: @@ -622,6 +906,7 @@ for(;;) if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); else if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); + fprintf(f, " (not)"); break; case OP_RECURSE: @@ -632,14 +917,17 @@ for(;;) case OP_REFI: flag = "/i"; + extra = code[1 + IMM2_SIZE]; /* Fall through */ case OP_REF: fprintf(f, " %s \\%d", flag, GET2(code,1)); + if (extra != 0) fprintf(f, " 0x%02x", extra); ccode = code + OP_lengths[*code]; goto CLASS_REF_REPEAT; case OP_DNREFI: flag = "/i"; + extra = code[1 + 2*IMM2_SIZE]; /* Fall through */ case OP_DNREF: { @@ -647,6 +935,7 @@ for(;;) fprintf(f, " %s \\k<", flag); print_custring(f, entry); fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + if (extra != 0) fprintf(f, " 0x%02x", extra); } ccode = code + OP_lengths[*code]; goto CLASS_REF_REPEAT; @@ -676,141 +965,77 @@ for(;;) print_prop(f, code, " ", ""); break; - /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm - in having this code always here, and it makes it less messy without all - those #ifdefs. */ - - case OP_CLASS: - case OP_NCLASS: - case OP_XCLASS: +#ifdef SUPPORT_WIDE_CHARS + case OP_ECLASS: + extra = GET(code, 1); + fprintf(f, " eclass[\n"); + /* We print the opcodes contained inside as well. */ + ccode = code + 1 + LINK_SIZE + 1; + if ((ccode[-1] & ECL_MAP) != 0) { - BOOL printmap, invertmap; - - fprintf(f, " ["); - - /* Negative XCLASS has an inverted map whereas the original opcodes have - already done the inversion. */ - - invertmap = FALSE; - if (*code == OP_XCLASS) - { - extra = GET(code, 1); - ccode = code + LINK_SIZE + 1; - printmap = (*ccode & XCL_MAP) != 0; - if ((*ccode & XCL_NOT) != 0) - { - invertmap = (*ccode & XCL_HASPROP) == 0; - fprintf(f, "^"); - } - ccode++; - } - else /* CLASS or NCLASS */ - { - printmap = TRUE; - ccode = code + 1; - } - - /* Print a bit map */ - - if (printmap) - { - uint8_t inverted_map[32]; - uint8_t *map = (uint8_t *)ccode; - - if (invertmap) - { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i]; - map = inverted_map; - } - - for (i = 0; i < 256; i++) - { - if ((map[i/8] & (1u << (i&7))) != 0) - { - int j; - for (j = i+1; j < 256; j++) - if ((map[j/8] & (1u << (j&7))) == 0) break; - if (i == '-' || i == ']') fprintf(f, "\\"); - if (PRINTABLE(i)) fprintf(f, "%c", i); - else fprintf(f, "\\x%02x", i); - if (--j > i) - { - if (j != i + 1) fprintf(f, "-"); - if (j == '-' || j == ']') fprintf(f, "\\"); - if (PRINTABLE(j)) fprintf(f, "%c", j); - else fprintf(f, "\\x%02x", j); - } - i = j; - } - } - ccode += 32 / sizeof(PCRE2_UCHAR); - } + const uint8_t *map = (const uint8_t *)ccode; + /* The first 6 ASCII characters (SOH...ACK) are totally, utterly useless. + If they're set in the bitmap, then it's clearly been formed by negation.*/ + BOOL print_negated = (map[0] & 0x7e) == 0x7e; + + fprintf(f, " bitmap: [%s", print_negated? "^" : ""); + print_map(f, map, print_negated); + fprintf(f, "]\n"); + ccode += 32 / sizeof(PCRE2_UCHAR); } - - /* For an XCLASS there is always some additional data */ - - if (*code == OP_XCLASS) + else + fprintf(f, " no bitmap\n"); + while (ccode < code + extra) { - PCRE2_UCHAR ch; - while ((ch = *ccode++) != XCL_END) + if (print_lengths) + fprintf(f, "%3d ", (int)(ccode - codestart)); + else + fprintf(f, " "); + + switch (*ccode) { - const char *notch = ""; + case ECL_AND: + fprintf(f, " AND\n"); + ccode += 1; + break; + case ECL_OR: + fprintf(f, " OR\n"); + ccode += 1; + break; + case ECL_XOR: + fprintf(f, " XOR\n"); + ccode += 1; + break; + case ECL_NOT: + fprintf(f, " NOT\n"); + ccode += 1; + break; - switch(ch) - { - case XCL_NOTPROP: - notch = "^"; - /* Fall through */ - - case XCL_PROP: - { - unsigned int ptype = *ccode++; - unsigned int pvalue = *ccode++; - const char *s; - - switch(ptype) - { - case PT_PXGRAPH: - fprintf(f, "[:%sgraph:]", notch); - break; - - case PT_PXPRINT: - fprintf(f, "[:%sprint:]", notch); - break; - - case PT_PXPUNCT: - fprintf(f, "[:%spunct:]", notch); - break; - - case PT_PXXDIGIT: - fprintf(f, "[:%sxdigit:]", notch); - break; - - default: - s = get_ucpname(ptype, pvalue); - fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'), - toupper(s[0]), s+1); - break; - } - } - break; + case ECL_XCLASS: + print_class(f, OP_XCLASS, ccode+1, (uint8_t*)codestart, utf, + " xclass: ", "\n"); + ccode += GET(ccode, 1); + break; - default: - ccode += 1 + print_char(f, ccode, utf); - if (ch == XCL_RANGE) - { - fprintf(f, "-"); - ccode += 1 + print_char(f, ccode, utf); - } - break; - } + default: + fprintf(f, " UNEXPECTED\n"); + ccode += 1; + break; } } + fprintf(f, " ]"); + goto CLASS_REF_REPEAT; +#endif /* SUPPORT_WIDE_CHARS */ - /* Indicate a non-UTF class which was created by negation */ - - fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); + case OP_CLASS: + case OP_NCLASS: +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + if (*code == OP_XCLASS) + extra = GET(code, 1); +#endif + print_class(f, *code, code+1, (uint8_t*)codestart, utf, " ", ""); + ccode = code + OP_lengths[*code] + extra; /* Handle repeats after a class or a back reference */ diff --git a/src/pcre2_serialize.c b/src/pcre2_serialize.c index ba17a26..a10e302 100644 --- a/src/pcre2_serialize.c +++ b/src/pcre2_serialize.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -127,25 +127,25 @@ dst_bytes += TABLES_LENGTH; for (i = 0; i < number_of_codes; i++) { re = (const pcre2_real_code *)(codes[i]); - (void)memcpy(dst_bytes, (char *)re, re->blocksize); - - /* Certain fields in the compiled code block are re-set during - deserialization. In order to ensure that the serialized data stream is always - the same for the same pattern, set them to zero here. We can't assume the - copy of the pattern is correctly aligned for accessing the fields as part of + (void)memcpy(dst_bytes, (const char *)re, re->blocksize); + + /* Certain fields in the compiled code block are re-set during + deserialization. In order to ensure that the serialized data stream is always + the same for the same pattern, set them to zero here. We can't assume the + copy of the pattern is correctly aligned for accessing the fields as part of a structure. Note the use of sizeof(void *) in the second of these, to - specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a - pointer to uint8_t), gcc gives a warning because the first argument is also a - pointer to uint8_t. Casting the first argument to (void *) can stop this, but + specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a + pointer to uint8_t), gcc gives a warning because the first argument is also a + pointer to uint8_t. Casting the first argument to (void *) can stop this, but it didn't stop Coverity giving the same complaint. */ - - (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, + + (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, sizeof(pcre2_memctl)); - (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, + (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, sizeof(void *)); (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0, - sizeof(void *)); - + sizeof(void *)); + dst_bytes += re->blocksize; } @@ -232,10 +232,10 @@ for (i = 0; i < number_of_codes; i++) if (dst_re->magic_number != MAGIC_NUMBER || dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || dst_re->name_count > MAX_NAME_COUNT) - { - memctl->free(dst_re, memctl->memory_data); + { + memctl->free(dst_re, memctl->memory_data); return PCRE2_ERROR_BADSERIALIZEDDATA; - } + } /* At the moment only one table is supported. */ diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 792e696..85764ce 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -114,7 +114,7 @@ uint32_t once_fudge = 0; BOOL had_recurse = FALSE; BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; PCRE2_SPTR nextbranch = code + GET(code, 1); -PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; +PCRE2_SPTR cc = code + 1 + LINK_SIZE; recurse_check this_recurse; /* If this is a "could be empty" group, its minimum length is 0. */ @@ -136,12 +136,13 @@ passes 16-bits, reset to that value and skip the rest of the branch. */ for (;;) { int d, min, recno; - PCRE2_UCHAR op, *cs, *ce; + PCRE2_UCHAR op; + PCRE2_SPTR cs, ce; if (branchlength >= UINT16_MAX) { branchlength = UINT16_MAX; - cc = (PCRE2_UCHAR *)nextbranch; + cc = nextbranch; } op = *cc; @@ -249,6 +250,7 @@ for (;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: + case OP_ASSERT_SCS: case OP_ASSERTBACK_NA: do cc += GET(cc, 1); while (*cc == OP_ALT); /* Fall through */ @@ -417,15 +419,14 @@ for (;;) case OP_NCLASS: #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: + case OP_ECLASS: /* The original code caused an unsigned overflow in 64 bit systems, so now we use a conditional statement. */ - if (op == OP_XCLASS) + if (op == OP_XCLASS || op == OP_ECLASS) cc += GET(cc, 1); else - cc += PRIV(OP_lengths)[OP_CLASS]; -#else - cc += PRIV(OP_lengths)[OP_CLASS]; #endif + cc += PRIV(OP_lengths)[OP_CLASS]; switch (*cc) { @@ -479,8 +480,8 @@ for (;;) if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) { int count = GET2(cc, 1+IMM2_SIZE); - PCRE2_UCHAR *slot = - (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + + PCRE2_SPTR slot = + (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) + GET2(cc, 1) * re->name_entry_size; d = INT_MAX; @@ -496,13 +497,12 @@ for (;;) dd = backref_cache[recno]; else { - ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + ce = cs = PRIV(find_bracket)(startcode, utf, recno); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); dd = 0; - if (!dupcapused || - (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL) + if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL) { if (cc > cs && cc < ce) /* Simple recursion */ { @@ -539,7 +539,7 @@ for (;;) } } else d = 0; - cc += 1 + 2*IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; goto REPEAT_BACK_REFERENCE; /* Single back reference by number. References by name are converted to by @@ -557,12 +557,11 @@ for (;;) if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) { - ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + ce = cs = PRIV(find_bracket)(startcode, utf, recno); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); - if (!dupcapused || - (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL) + if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL) { if (cc > cs && cc < ce) /* Simple recursion */ { @@ -593,7 +592,7 @@ for (;;) backref_cache[0] = recno; } - cc += 1 + IMM2_SIZE; + cc += PRIV(OP_lengths)[*cc]; /* Handle repeated back references */ @@ -643,7 +642,7 @@ for (;;) pattern contains multiple subpatterns with the same number. */ case OP_RECURSE: - cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1); + cs = ce = startcode + GET(cc, 1); recno = GET2(cs, 1+LINK_SIZE); if (recno == prev_recurse_recno) { @@ -755,10 +754,13 @@ for (;;) new ones get added they are properly considered. */ default: + PCRE2_DEBUG_UNREACHABLE(); return -3; } } -/* Control never gets here */ + +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ +return -3; /* Avoid compiler warnings */ } @@ -919,6 +921,138 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +/************************************************* +* Set starting bits for a character list. * +*************************************************/ + +/* This function sets starting bits for a character list. It enumerates +all characters and character ranges in the character list, and sets +the starting bits accordingly. + +Arguments: + code pointer to the code + start_bitmap pointer to the starting bitmap + +Returns: nothing +*/ +static void +study_char_list(PCRE2_SPTR code, uint8_t *start_bitmap, + const uint8_t *char_lists_end) +{ +uint32_t type, list_ind; +uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD; +uint32_t range_start = ~(uint32_t)0, range_end = 0; +const uint8_t *next_char; +PCRE2_UCHAR start_buffer[6], end_buffer[6]; +PCRE2_UCHAR start, end; + +/* Only needed in 8-bit mode at the moment. */ +type = (uint32_t)(code[0] << 8) | code[1]; +code += 2; + +/* Align characters. */ +next_char = char_lists_end - (GET(code, 0) << 1); +type &= XCL_TYPE_MASK; +list_ind = 0; + +if ((type & XCL_BEGIN_WITH_RANGE) != 0) + range_start = XCL_CHAR_LIST_LOW_16_START; + +while (type > 0) + { + uint32_t item_count = type & XCL_ITEM_COUNT_MASK; + + if (item_count == XCL_ITEM_COUNT_MASK) + { + if (list_ind <= 1) + { + item_count = *(const uint16_t*)next_char; + next_char += 2; + } + else + { + item_count = *(const uint32_t*)next_char; + next_char += 4; + } + } + + while (item_count > 0) + { + if (list_ind <= 1) + { + range_end = *(const uint16_t*)next_char; + next_char += 2; + } + else + { + range_end = *(const uint32_t*)next_char; + next_char += 4; + } + + if ((range_end & XCL_CHAR_END) != 0) + { + range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + PRIV(ord2utf)(range_end, end_buffer); + end = end_buffer[0]; + + if (range_start < range_end) + { + PRIV(ord2utf)(range_start, start_buffer); + for (start = start_buffer[0]; start <= end; start++) + start_bitmap[start / 8] |= (1u << (start & 7)); + } + else + start_bitmap[end / 8] |= (1u << (end & 7)); + + range_start = ~(uint32_t)0; + } + else + range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT); + + item_count--; + } + + list_ind++; + type >>= XCL_TYPE_BIT_LEN; + + if (range_start == ~(uint32_t)0) + { + if ((type & XCL_BEGIN_WITH_RANGE) != 0) + { + /* In 8 bit mode XCL_CHAR_LIST_HIGH_32_START is not possible. */ + if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START; + else range_start = XCL_CHAR_LIST_LOW_32_START; + } + } + else if ((type & XCL_BEGIN_WITH_RANGE) == 0) + { + PRIV(ord2utf)(range_start, start_buffer); + + /* In 8 bit mode XCL_CHAR_LIST_LOW_32_END and + XCL_CHAR_LIST_HIGH_32_END are not possible. */ + if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END; + else range_end = XCL_CHAR_LIST_HIGH_16_END; + + PRIV(ord2utf)(range_end, end_buffer); + end = end_buffer[0]; + + for (start = start_buffer[0]; start <= end; start++) + start_bitmap[start / 8] |= (1u << (start & 7)); + + range_start = ~(uint32_t)0; + } + + /* In 8 bit mode XCL_CHAR_LIST_HIGH_32_ADD is not possible. */ + if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD; + else char_list_add = XCL_CHAR_LIST_LOW_32_ADD; + } +} +#endif + + + /************************************************* * Create bitmap of starting code units * *************************************************/ @@ -980,7 +1114,7 @@ do { int rc; PCRE2_SPTR ncode; - uint8_t *classmap = NULL; + const uint8_t *classmap = NULL; #ifdef SUPPORT_WIDE_CHARS PCRE2_UCHAR xclassflags; #endif @@ -1134,6 +1268,7 @@ do case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: ncode += GET(ncode, 1); while (*ncode == OP_ALT) ncode += GET(ncode, 1); ncode += 1 + LINK_SIZE; @@ -1252,12 +1387,14 @@ do tcode += GET(tcode, 1 + 2*LINK_SIZE); break; - /* Skip over lookbehind and negative lookahead assertions */ + /* Skip over lookbehind, negative lookahead, and scan substring + assertions */ case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NA: + case OP_ASSERT_SCS: do tcode += GET(tcode, 1); while (*tcode == OP_ALT); tcode += 1 + LINK_SIZE; break; @@ -1578,6 +1715,13 @@ do tcode += 2; break; + /* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */ + +#ifdef SUPPORT_WIDE_CHARS + case OP_ECLASS: + return SSB_FAIL; +#endif + /* Extended class: if there are any property checks, or if this is a negative XCLASS without a map, give up. If there are no property checks, there must be wide characters on the XCLASS list, because otherwise an @@ -1596,7 +1740,7 @@ do map pointer if there is one, and fall through. */ classmap = ((xclassflags & XCL_MAP) == 0)? NULL : - (uint8_t *)(tcode + 1 + LINK_SIZE + 1); + (const uint8_t *)(tcode + 1 + LINK_SIZE + 1); /* In UTF-8 mode, scan the character list and set bits for leading bytes, then jump to handle the map. */ @@ -1608,6 +1752,13 @@ do PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32); tcode += GET(tcode, 1); + if (*p >= XCL_LIST) + { + study_char_list(p, re->start_bitmap, + ((const uint8_t *)re + re->code_start)); + goto HANDLE_CLASSMAP; + } + for (;;) switch (*p++) { case XCL_SINGLE: @@ -1629,6 +1780,7 @@ do goto HANDLE_CLASSMAP; default: + PCRE2_DEBUG_UNREACHABLE(); return SSB_UNKNOWN; /* Internal error, should not occur */ } } @@ -1665,7 +1817,7 @@ do case OP_CLASS: if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else { - classmap = (uint8_t *)(++tcode); + classmap = (const uint8_t *)(++tcode); tcode += 32 / sizeof(PCRE2_UCHAR); } @@ -1768,8 +1920,7 @@ BOOL ucp = (re->overall_options & PCRE2_UCP) != 0; /* Find start of compiled code */ -code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + - re->name_entry_size * re->name_count; +code = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start); /* For a pattern that has a first code unit, or a multiline pattern that matches only at "line start", there is no point in seeking a list of starting @@ -1779,7 +1930,11 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) { int depth = 0; int rc = set_start_bits(re, code, utf, ucp, &depth); - if (rc == SSB_UNKNOWN) return 1; + if (rc == SSB_UNKNOWN) + { + PCRE2_DEBUG_UNREACHABLE(); + return 1; + } /* If a list of starting code units was set up, scan the list to see if only one or two were listed. Having only one listed is rare because usually a @@ -1852,25 +2007,22 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) } } - /* Replace the start code unit bits with a first code unit, but only if it - is not the same as a required later code unit. This is because a search for - a required code unit starts after an explicit first code unit, but at a - code unit found from the bitmap. Patterns such as /a*a/ don't work - if both the start unit and required unit are the same. */ - - if (a >= 0 && - ( - (re->flags & PCRE2_LASTSET) == 0 || - ( - re->last_codeunit != (uint32_t)a && - (b < 0 || re->last_codeunit != (uint32_t)b) - ) - )) - { + /* Replace the start code unit bits with a first code unit. If it is the + same as a required later code unit, then clear the required later code + unit. This is because a search for a required code unit starts after an + explicit first code unit, but at a code unit found from the bitmap. + Patterns such as /a*a/ don't work if both the start unit and required + unit are the same. */ + + if (a >= 0) { + if ((re->flags & PCRE2_LASTSET) && (re->last_codeunit == (uint32_t)a || (b >= 0 && re->last_codeunit == (uint32_t)b))) { + re->flags &= ~(PCRE2_LASTSET | PCRE2_LASTCASELESS); + re->last_codeunit = 0; + } re->first_codeunit = a; flags = PCRE2_FIRSTSET; if (b >= 0) flags |= PCRE2_FIRSTCASELESS; - } + } DONE: re->flags |= flags; @@ -1898,9 +2050,11 @@ if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 && break; /* Leave minlength unchanged (will be zero) */ case -2: + PCRE2_DEBUG_UNREACHABLE(); return 2; /* missing capturing bracket */ case -3: + PCRE2_DEBUG_UNREACHABLE(); return 3; /* unrecognized opcode */ default: diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c index edbb78c..17040ce 100644 --- a/src/pcre2_substitute.c +++ b/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -130,17 +130,21 @@ for (; ptr < ptrend; ptr++) ptr += 1; /* Must point after \ */ erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, - code->overall_options, code->extra_options, FALSE, NULL); + code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL); ptr -= 1; /* Back to last code unit of escape */ if (errorcode != 0) { - rc = errorcode; + /* errorcode from check_escape is positive, so must not be returned by + pcre2_substitute(). */ + rc = PCRE2_ERROR_BADREPESCAPE; goto EXIT; } switch(erc) { case 0: /* Data character */ + case ESC_b: /* Data character */ + case ESC_v: /* Data character */ case ESC_E: /* Isolated \E is ignored */ break; @@ -148,7 +152,18 @@ for (; ptr < ptrend; ptr++) literal = TRUE; break; + case ESC_g: + /* The \g form (\g already handled by check_escape) + + Don't worry about finding the matching ">". We are super, super lenient + about validating ${} replacements inside find_text_end(), so we certainly + don't need to worry about other syntax. Importantly, a \g<..> or $<...> + sequence can't contain a '}' character. */ + break; + default: + if (erc < 0) + break; /* capture group reference */ rc = PCRE2_ERROR_BADREPESCAPE; goto EXIT; } @@ -163,6 +178,426 @@ return rc; } +/************************************************* +* Validate group name * +*************************************************/ + +/* This function scans for a capture group name, validating it +consists of legal characters, is not empty, and does not exceed +MAX_NAME_SIZE. + +Arguments: + ptrptr points to the pointer to the start of the text (updated) + ptrend end of the whole string + utf true if the input is UTF-encoded + ctypes pointer to the character types table + +Returns: TRUE if a name was read + FALSE otherwise +*/ + +static BOOL +read_name_subst(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, BOOL utf, + const uint8_t* ctypes) +{ +PCRE2_SPTR ptr = *ptrptr; +PCRE2_SPTR nameptr = ptr; + +if (ptr >= ptrend) /* No characters in name */ + goto FAILED; + +/* We do not need to check whether the name starts with a non-digit. +We are simply referencing names here, not defining them. */ + +/* See read_name in the pcre2_compile.c for the corresponding logic +restricting group names inside the pattern itself. */ + +#ifdef SUPPORT_UNICODE +if (utf) + { + uint32_t c, type; + + while (ptr < ptrend) + { + GETCHAR(c, ptr); + type = UCD_CHARTYPE(c); + if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L && + c != CHAR_UNDERSCORE) break; + ptr++; + FORWARDCHARTEST(ptr, ptrend); + } + } +else +#else +(void)utf; /* Avoid compiler warning */ +#endif /* SUPPORT_UNICODE */ + +/* Handle group names in non-UTF modes. */ + + { + while (ptr < ptrend && MAX_255(*ptr) && (ctypes[*ptr] & ctype_word) != 0) + { + ptr++; + } + } + +/* Check name length */ + +if (ptr - nameptr > MAX_NAME_SIZE) + goto FAILED; + +/* Subpattern names must not be empty */ +if (ptr == nameptr) + goto FAILED; + +*ptrptr = ptr; +return TRUE; + +FAILED: +*ptrptr = ptr; +return FALSE; +} + + +/************************************************* +* Case transformations * +*************************************************/ + +#define PCRE2_SUBSTITUTE_CASE_NONE 0 +// 1, 2, 3 are PCRE2_SUBSTITUTE_CASE_LOWER, UPPER, TITLE_FIRST. +#define PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST 4 + +typedef struct { + int to_case; /* One of PCRE2_SUBSTITUTE_CASE_xyz */ + BOOL single_char; +} case_state; + +/* Helper to guess how much a string is likely to increase in size when +case-transformed. Usually, strings don't change size at all, but some rare +characters do grow. Estimate +10%, plus another few characters. + +Performing this estimation is unfortunate, but inevitable, since we can't call +the callout if we ran out of buffer space to prepare its input. + +Because this estimate is inexact (and in pathological cases, underestimates the +required buffer size) we must document that when you have a +substitute_case_callout, and you are using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, you +may need more than two calls to determine the final buffer size. */ + +static PCRE2_SIZE +pessimistic_case_inflation(PCRE2_SIZE len) +{ +return (len >> 3u) + 10; +} + +/* Case transformation behaviour if no callout is passed. */ + +static PCRE2_SIZE +default_substitute_case_callout( + PCRE2_SPTR input, PCRE2_SIZE input_len, + PCRE2_UCHAR *output, PCRE2_SIZE output_cap, + case_state *state, const pcre2_code *code) +{ +PCRE2_SPTR input_end = input + input_len; +#ifdef SUPPORT_UNICODE +BOOL utf; +BOOL ucp; +#endif +PCRE2_UCHAR temp[6]; +BOOL next_to_upper; +BOOL rest_to_upper; +BOOL single_char; +BOOL overflow = FALSE; +PCRE2_SIZE written = 0; + +/* Helpful simplifying invariant: input and output are disjoint buffers. +I believe that this code is technically undefined behaviour, because the two +pointers input/output are "unrelated" pointers and hence not comparable. Casting +via char* bypasses some but not all of those technical rules. It is not included +in release builds, in any case. */ +PCRE2_ASSERT((char *)(input + input_len) <= (char *)output || + (char *)(output + output_cap) <= (char *)input); + +#ifdef SUPPORT_UNICODE +utf = (code->overall_options & PCRE2_UTF) != 0; +ucp = (code->overall_options & PCRE2_UCP) != 0; +#endif + +if (input_len == 0) return 0; + +switch (state->to_case) + { + default: + PCRE2_DEBUG_UNREACHABLE(); + return 0; + + case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE + case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE + next_to_upper = rest_to_upper = (state->to_case == PCRE2_SUBSTITUTE_CASE_UPPER); + break; + + case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE + next_to_upper = TRUE; + rest_to_upper = FALSE; + state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + break; + + case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE + next_to_upper = FALSE; + rest_to_upper = TRUE; + state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + break; + } + +single_char = state->single_char; +if (single_char) + state->to_case = PCRE2_SUBSTITUTE_CASE_NONE; + +while (input < input_end) + { + uint32_t ch; + unsigned int chlen; + + GETCHARINCTEST(ch, input); + +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && ch >= 128) + { + uint32_t type = UCD_CHARTYPE(ch); + if (PRIV(ucp_gentype)[type] == ucp_L && + type != (next_to_upper? ucp_Lu : ucp_Ll)) + ch = UCD_OTHERCASE(ch); + + /* TODO This is far from correct... it doesn't support the SpecialCasing.txt + mappings, but worse, it's not even correct for all the ordinary case + mappings. We should add support for those (at least), and then add the + SpecialCasing.txt mappings for Esszet and ligatures, and finally use the + Turkish casing flag on the match context. */ + } + else +#endif + if (MAX_255(ch)) + { + if (((code->tables + cbits_offset + + (next_to_upper? cbit_upper:cbit_lower) + )[ch/8] & (1u << (ch%8))) == 0) + ch = (code->tables + fcc_offset)[ch]; + } + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + + if (!overflow && chlen <= output_cap) + { + memcpy(output, temp, CU2BYTES(chlen)); + output += chlen; + output_cap -= chlen; + } + else + { + overflow = TRUE; + } + + if (chlen > ~(PCRE2_SIZE)0 - written) /* Integer overflow */ + return ~(PCRE2_SIZE)0; + written += chlen; + + next_to_upper = rest_to_upper; + + /* memcpy the remainder, if only transforming a single character. */ + + if (single_char) + { + PCRE2_SIZE rest_len = input_end - input; + + if (!overflow && rest_len <= output_cap) + memcpy(output, input, CU2BYTES(rest_len)); + + if (rest_len > ~(PCRE2_SIZE)0 - written) /* Integer overflow */ + return ~(PCRE2_SIZE)0; + written += rest_len; + + return written; + } + } + +return written; +} + +/* Helper to perform the call to the substitute_case_callout. We wrap the +user-provided callout because our internal arguments are slightly extended. We +don't want the user callout to handle the case of "\l" (first character only to +lowercase) or "\l\U" (first character to lowercase, rest to uppercase) because +those are not operations defined by Unicode. Instead the user callout simply +needs to provide the three Unicode primitives: lower, upper, titlecase. */ + +static PCRE2_SIZE +do_case_copy( + PCRE2_UCHAR *input_output, PCRE2_SIZE input_len, PCRE2_SIZE output_cap, + case_state *state, BOOL utf, + PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *), + void *substitute_case_callout_data) +{ +PCRE2_SPTR input = input_output; +PCRE2_UCHAR *output = input_output; +PCRE2_SIZE rc; +PCRE2_SIZE rc2; +int ch1_to_case; +int rest_to_case; +PCRE2_UCHAR ch1[6]; +PCRE2_SIZE ch1_len; +PCRE2_SPTR rest; +PCRE2_SIZE rest_len; +BOOL ch1_overflow = FALSE; +BOOL rest_overflow = FALSE; + +#if PCRE2_CODE_UNIT_WIDTH == 32 || !defined(SUPPORT_UNICODE) +(void)utf; /* Avoid compiler warning. */ +#endif + +PCRE2_ASSERT(input_len != 0); + +switch (state->to_case) + { + default: + PCRE2_DEBUG_UNREACHABLE(); + return 0; + + case PCRE2_SUBSTITUTE_CASE_LOWER: // Can be single_char TRUE or FALSE + case PCRE2_SUBSTITUTE_CASE_UPPER: // Can only be single_char FALSE + case PCRE2_SUBSTITUTE_CASE_TITLE_FIRST: // Can be single_char TRUE or FALSE + + /* The easy case, where our internal casing operations align with those of + the callout. */ + + if (state->single_char == FALSE) + { + rc = substitute_case_callout(input, input_len, output, output_cap, + state->to_case, substitute_case_callout_data); + + if (state->to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST) + state->to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + + return rc; + } + + ch1_to_case = state->to_case; + rest_to_case = PCRE2_SUBSTITUTE_CASE_NONE; + break; + + case PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST: // Can only be single_char FALSE + ch1_to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + rest_to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + break; + } + +/* Identify the leading character. Take copy, because its storage overlaps with +`output`, and hence may be scrambled by the callout. */ + + { + PCRE2_SPTR ch_end = input; + uint32_t ch; + + GETCHARINCTEST(ch, ch_end); + (void) ch; + PCRE2_ASSERT(ch_end <= input + input_len && ch_end - input <= 6); + ch1_len = ch_end - input; + memcpy(ch1, input, CU2BYTES(ch1_len)); + } + +rest = input + ch1_len; +rest_len = input_len - ch1_len; + +/* Transform just ch1. The buffers are always in-place (input == output). With a +custom callout, we need a loop to discover its required buffer size. The loop +wouldn't be required if the callout were well-behaved, but it might be naughty +and return "5" the first time, then "10" the next time we call it using the +exact same input! */ + + { + PCRE2_SIZE ch1_cap; + PCRE2_SIZE max_ch1_cap; + + ch1_cap = ch1_len; /* First attempt uses the space vacated by ch1. */ + PCRE2_ASSERT(output_cap >= input_len && input_len >= rest_len); + max_ch1_cap = output_cap - rest_len; + + while (TRUE) + { + rc = substitute_case_callout(ch1, ch1_len, output, ch1_cap, ch1_to_case, + substitute_case_callout_data); + if (rc == ~(PCRE2_SIZE)0) return rc; + + if (rc <= ch1_cap) break; + + if (rc > max_ch1_cap) + { + ch1_overflow = TRUE; + break; + } + + /* Move the rest to the right, to make room for expanding ch1. */ + + memmove(input_output + rc, rest, CU2BYTES(rest_len)); + rest = input + rc; + + ch1_cap = rc; + + /* Proof of loop termination: `ch1_cap` is growing on each iteration, but + the loop ends if `rc` reaches the (unchanging) upper bound of output_cap. */ + } + } + +if (rest_to_case == PCRE2_SUBSTITUTE_CASE_NONE) + { + if (!ch1_overflow) + { + PCRE2_ASSERT(rest_len <= output_cap - rc); + memmove(output + rc, rest, CU2BYTES(rest_len)); + } + rc2 = rest_len; + + state->to_case = PCRE2_SUBSTITUTE_CASE_NONE; + } +else + { + PCRE2_UCHAR dummy[1]; + + rc2 = substitute_case_callout(rest, rest_len, + ch1_overflow? dummy : output + rc, + ch1_overflow? 0u : output_cap - rc, + rest_to_case, substitute_case_callout_data); + if (rc2 == ~(PCRE2_SIZE)0) return rc2; + + if (!ch1_overflow && rc2 > output_cap - rc) rest_overflow = TRUE; + + /* If ch1 grows so that `xform(ch1)+rest` can't fit in the buffer, but then + `rest` shrinks, it's actually possible for the total calculated length of + `xform(ch1)+xform(rest)` to come out at less than output_cap. But we can't + report that, because it would make it seem that the operation succeeded. + If either of xform(ch1) or xform(rest) won't fit in the buffer, our final + result must be > output_cap. */ + if (ch1_overflow && rc2 < rest_len) + rc2 = rest_len; + + state->to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + } + +if (rc2 > ~(PCRE2_SIZE)0 - rc) /* Integer overflow */ + return ~(PCRE2_SIZE)0; + +PCRE2_ASSERT(!(ch1_overflow || rest_overflow) || rc + rc2 > output_cap); +(void)rest_overflow; + +return rc + rc2; +} + /************************************************* * Match and substitute * @@ -194,25 +629,107 @@ Returns: >= 0 number of substitutions made overflow, either give an error immediately, or keep on, accumulating the length. */ -#define CHECKMEMCPY(from,length) \ - { \ - if (!overflowed && lengthleft < length) \ - { \ - if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ - overflowed = TRUE; \ - extra_needed = length - lengthleft; \ - } \ - else if (overflowed) \ - { \ - extra_needed += length; \ - } \ - else \ - { \ - memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ - buff_offset += length; \ - lengthleft -= length; \ - } \ - } +#define CHECKMEMCPY(from, length_) \ + do { \ + PCRE2_SIZE chkmc_length = length_; \ + if (overflowed) \ + { \ + if (chkmc_length > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \ + goto TOOLARGEREPLACE; \ + extra_needed += chkmc_length; \ + } \ + else if (lengthleft < chkmc_length) \ + { \ + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ + overflowed = TRUE; \ + extra_needed = chkmc_length - lengthleft; \ + } \ + else \ + { \ + memcpy(buffer + buff_offset, from, CU2BYTES(chkmc_length)); \ + buff_offset += chkmc_length; \ + lengthleft -= chkmc_length; \ + } \ + } \ + while (0) + +/* This macro checks for space and copies characters with casing modifications. +On overflow, it behaves as for CHECKMEMCPY(). + +When substitute_case_callout is NULL, the source and destination buffers must +not overlap, because our default handler does not support this. */ + +#define CHECKCASECPY_BASE(length_, do_call) \ + do { \ + PCRE2_SIZE chkcc_length = (PCRE2_SIZE)(length_); \ + PCRE2_SIZE chkcc_rc; \ + do_call \ + if (lengthleft < chkcc_rc) \ + { \ + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ + overflowed = TRUE; \ + extra_needed = chkcc_rc - lengthleft; \ + } \ + else \ + { \ + buff_offset += chkcc_rc; \ + lengthleft -= chkcc_rc; \ + } \ + } \ + while (0) + +#define CHECKCASECPY_DEFAULT(from, length_) \ + CHECKCASECPY_BASE(length_, { \ + chkcc_rc = default_substitute_case_callout(from, chkcc_length, \ + buffer + buff_offset, \ + overflowed? 0 : lengthleft, \ + &forcecase, code); \ + if (overflowed) \ + { \ + if (chkcc_rc > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \ + goto TOOLARGEREPLACE; \ + extra_needed += chkcc_rc; \ + break; \ + } \ + }) + +#define CHECKCASECPY_CALLOUT(length_) \ + CHECKCASECPY_BASE(length_, { \ + chkcc_rc = do_case_copy(buffer + buff_offset, chkcc_length, \ + lengthleft, &forcecase, utf, \ + substitute_case_callout, \ + substitute_case_callout_data); \ + if (chkcc_rc == ~(PCRE2_SIZE)0) goto CASEERROR; \ + }) + +/* This macro does a delayed case transformation, for the situation when we have +a case-forcing callout. */ + +#define DELAYEDFORCECASE() \ + do { \ + PCRE2_SIZE chars_outstanding = (buff_offset - casestart_offset) + \ + (extra_needed - casestart_extra_needed); \ + if (chars_outstanding > 0) \ + { \ + if (overflowed) \ + { \ + PCRE2_SIZE guess = pessimistic_case_inflation(chars_outstanding); \ + if (guess > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ \ + goto TOOLARGEREPLACE; \ + extra_needed += guess; \ + } \ + else \ + { \ + /* Rewind the buffer */ \ + lengthleft += (buff_offset - casestart_offset); \ + buff_offset = casestart_offset; \ + /* Care! In-place case transformation */ \ + CHECKCASECPY_CALLOUT(chars_outstanding); \ + } \ + } \ + } \ + while (0) + /* Here's the function */ @@ -224,8 +741,6 @@ pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, { int rc; int subs; -int forcecase = 0; -int forcecasereset = 0; uint32_t ovector_count; uint32_t goptions = 0; uint32_t suboptions; @@ -234,18 +749,19 @@ BOOL escaped_literal = FALSE; BOOL overflowed = FALSE; BOOL use_existing_match; BOOL replacement_only; -#ifdef SUPPORT_UNICODE BOOL utf = (code->overall_options & PCRE2_UTF) != 0; -BOOL ucp = (code->overall_options & PCRE2_UCP) != 0; -#endif PCRE2_UCHAR temp[6]; PCRE2_SPTR ptr; -PCRE2_SPTR repend; +PCRE2_SPTR repend = NULL; PCRE2_SIZE extra_needed = 0; PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE *ovector; PCRE2_SIZE ovecsave[3]; pcre2_substitute_callout_block scb; +PCRE2_SIZE sub_start_extra_needed; +PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, + PCRE2_SIZE, int, void *) = NULL; +void *substitute_case_callout_data = NULL; /* General initialization */ @@ -254,6 +770,12 @@ lengthleft = buff_length = *blength; *blength = PCRE2_UNSET; ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; +if (mcontext != NULL) + { + substitute_case_callout = mcontext->substitute_case_callout; + substitute_case_callout_data = mcontext->substitute_case_callout_data; + } + /* Partial matching is not valid. This must come after setting *blength to PCRE2_UNSET, so as not to imply an offset in the replacement. */ @@ -286,27 +808,34 @@ case, we copy the existing match into the internal block, except for any cached heap frame size and pointer. This ensures that no changes are made to the external match data block. */ +/* WARNING: In both cases below a general context is constructed "by hand" +because calling pcre2_general_context_create() involves a memory allocation. If +the contents of a general context control block are ever changed there will +have to be changes below. */ + if (match_data == NULL) { - pcre2_general_context *gcontext; + pcre2_general_context gcontext; if (use_existing_match) return PCRE2_ERROR_NULL; - gcontext = (mcontext == NULL)? - (pcre2_general_context *)code : - (pcre2_general_context *)mcontext; + gcontext.memctl = (mcontext == NULL)? + ((const pcre2_real_code *)code)->memctl : + ((pcre2_real_match_context *)mcontext)->memctl; match_data = internal_match_data = - pcre2_match_data_create_from_pattern(code, gcontext); + pcre2_match_data_create_from_pattern(code, &gcontext); if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; } else if (use_existing_match) { - pcre2_general_context *gcontext = (mcontext == NULL)? - (pcre2_general_context *)code : - (pcre2_general_context *)mcontext; - int pairs = (code->top_bracket + 1 < match_data->oveccount)? + int pairs; + pcre2_general_context gcontext; + gcontext.memctl = (mcontext == NULL)? + ((const pcre2_real_code *)code)->memctl : + ((pcre2_real_match_context *)mcontext)->memctl; + pairs = (code->top_bracket + 1 < match_data->oveccount)? code->top_bracket + 1 : match_data->oveccount; internal_match_data = pcre2_match_data_create(match_data->oveccount, - gcontext); + &gcontext); if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector) + 2*pairs*sizeof(PCRE2_SIZE)); @@ -380,6 +909,9 @@ do { PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; uint32_t ptrstackptr = 0; + case_state forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE }; + PCRE2_SIZE casestart_offset = 0; + PCRE2_SIZE casestart_extra_needed = 0; if (use_existing_match) { @@ -412,8 +944,9 @@ do save_start = start_offset++; if (subject[start_offset-1] == CHAR_CR && - code->newline_convention != PCRE2_NEWLINE_CR && - code->newline_convention != PCRE2_NEWLINE_LF && + (code->newline_convention == PCRE2_NEWLINE_CRLF || + code->newline_convention == PCRE2_NEWLINE_ANY || + code->newline_convention == PCRE2_NEWLINE_ANYCRLF) && start_offset < length && subject[start_offset] == CHAR_LF) start_offset++; @@ -480,14 +1013,16 @@ do } subs++; - /* Copy the text leading up to the match (unless not required), and remember - where the insert begins and how many ovector pairs are set. */ + /* Copy the text leading up to the match (unless not required); remember + where the insert begins and how many ovector pairs are set; and remember how + much space we have requested in extra_needed. */ if (rc == 0) rc = ovector_count; fraglength = ovector[0] - start_offset; if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength); scb.output_offsets[0] = buff_offset; scb.oveccount = rc; + sub_start_extra_needed = extra_needed; /* Process the replacement string. If the entire replacement is literal, just copy it with length check. */ @@ -507,6 +1042,13 @@ do { uint32_t ch; unsigned int chlen; + int group; + uint32_t special; + PCRE2_SPTR text1_start = NULL; + PCRE2_SPTR text1_end = NULL; + PCRE2_SPTR text2_start = NULL; + PCRE2_SPTR text2_end = NULL; + PCRE2_UCHAR name[MAX_NAME_SIZE + 1]; /* If at the end of a nested substring, pop the stack. */ @@ -535,25 +1077,62 @@ do if (*ptr == CHAR_DOLLAR_SIGN) { - int group, n; - uint32_t special = 0; BOOL inparens; + BOOL inangle; BOOL star; PCRE2_SIZE sublength; - PCRE2_SPTR text1_start = NULL; - PCRE2_SPTR text1_end = NULL; - PCRE2_SPTR text2_start = NULL; - PCRE2_SPTR text2_end = NULL; PCRE2_UCHAR next; - PCRE2_UCHAR name[33]; + PCRE2_SPTR subptr, subptrend; if (++ptr >= repend) goto BAD; if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; + special = 0; + text1_start = NULL; + text1_end = NULL; + text2_start = NULL; + text2_end = NULL; group = -1; - n = 0; inparens = FALSE; + inangle = FALSE; star = FALSE; + subptr = NULL; + subptrend = NULL; + + /* Special $ sequences, as supported by Perl, JavaScript, .NET and others. */ + if (next == CHAR_AMPERSAND) + { + ++ptr; + group = 0; + goto GROUP_SUBSTITUTE; + } + if (next == CHAR_GRAVE_ACCENT || next == CHAR_APOSTROPHE) + { + ++ptr; + rc = pcre2_substring_length_bynumber(match_data, 0, &sublength); + if (rc < 0) goto PTREXIT; /* (Sanity-check ovector before reading from it.) */ + + if (next == CHAR_GRAVE_ACCENT) + { + subptr = subject; + subptrend = subject + ovector[0]; + } + else + { + subptr = subject + ovector[1]; + subptrend = subject + length; + } + + goto SUBPTR_SUBSTITUTE; + } + if (next == CHAR_UNDERSCORE) + { + /* Java, .NET support $_ for "entire input string". */ + ++ptr; + subptr = subject; + subptrend = subject + length; + goto SUBPTR_SUBSTITUTE; + } if (next == CHAR_LEFT_CURLY_BRACKET) { @@ -561,22 +1140,31 @@ do next = *ptr; inparens = TRUE; } + else if (next == CHAR_LESS_THAN_SIGN) + { + /* JavaScript compatibility syntax, $. Processes only named + groups (not numbered) and does not support extensions such as star + (you can do ${name} and ${*name}, but not $<*name>). */ + if (++ptr >= repend) goto BAD; + next = *ptr; + inangle = TRUE; + } - if (next == CHAR_ASTERISK) + if (!inangle && next == CHAR_ASTERISK) { if (++ptr >= repend) goto BAD; next = *ptr; star = TRUE; } - if (!star && next >= CHAR_0 && next <= CHAR_9) + if (!star && !inangle && next >= CHAR_0 && next <= CHAR_9) { group = next - CHAR_0; while (++ptr < repend) { next = *ptr; if (next < CHAR_0 || next > CHAR_9) break; - group = group * 10 + next - CHAR_0; + group = group * 10 + (next - CHAR_0); /* A check for a number greater than the hightest captured group is sufficient here; no need for a separate overflow check. If unknown @@ -600,25 +1188,25 @@ do } else { - const uint8_t *ctypes = code->tables + ctypes_offset; - while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) - { - name[n++] = next; - if (n > 32) goto BAD; - if (++ptr >= repend) break; - next = *ptr; - } - if (n == 0) goto BAD; - name[n] = 0; + PCRE2_SIZE name_len; + PCRE2_SPTR name_start = ptr; + if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset)) + goto BAD; + name_len = ptr - name_start; + memcpy(name, name_start, CU2BYTES(name_len)); + name[name_len] = 0; } + next = 0; /* not used or updated after this point */ + (void)next; + /* In extended mode we recognize ${name:+set text:unset text} and ${name:-default text}. */ if (inparens) { if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && - !star && ptr < repend - 2 && next == CHAR_COLON) + !star && ptr < repend - 2 && *ptr == CHAR_COLON) { special = *(++ptr); if (special != CHAR_PLUS && special != CHAR_MINUS) @@ -653,6 +1241,13 @@ do ptr++; } + if (inangle) + { + if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN) + goto BAD; + ptr++; + } + /* Have found a syntactically correct group number or name, or *name. Only *MARK is currently recognized. */ @@ -663,10 +1258,14 @@ do PCRE2_SPTR mark = pcre2_get_mark(match_data); if (mark != NULL) { - PCRE2_SPTR mark_start = mark; - while (*mark != 0) mark++; - fraglength = mark - mark_start; - CHECKMEMCPY(mark_start, fraglength); + /* Peek backwards one code unit to obtain the length of the mark. + It can (theoretically) contain an embedded NUL. */ + fraglength = mark[-1]; + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(mark, fraglength); + else + CHECKMEMCPY(mark, fraglength); } } else goto BAD; @@ -677,8 +1276,7 @@ do else { - PCRE2_SPTR subptr, subptrend; - + GROUP_SUBSTITUTE: /* Find a number for a named group. In case there are duplicate names, search for the first one that is set. If the name is not found when PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a @@ -775,41 +1373,14 @@ do /* Substitute a literal string, possibly forcing alphabetic case. */ - while (subptr < subptrend) - { - GETCHARINCTEST(ch, subptr); - if (forcecase != 0) - { -#ifdef SUPPORT_UNICODE - if (utf || ucp) - { - uint32_t type = UCD_CHARTYPE(ch); - if (PRIV(ucp_gentype)[type] == ucp_L && - type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) - ch = UCD_OTHERCASE(ch); - } - else -#endif - { - if (((code->tables + cbits_offset + - ((forcecase > 0)? cbit_upper:cbit_lower) - )[ch/8] & (1u << (ch%8))) == 0) - ch = (code->tables + fcc_offset)[ch]; - } - forcecase = forcecasereset; - } - -#ifdef SUPPORT_UNICODE - if (utf) chlen = PRIV(ord2utf)(ch, temp); else -#endif - { - temp[0] = ch; - chlen = 1; - } - CHECKMEMCPY(temp, chlen); - } + SUBPTR_SUBSTITUTE: + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(subptr, subptrend - subptr); + else + CHECKMEMCPY(subptr, subptrend - subptr); } - } + } /* End of $ processing */ /* Handle an escape sequence in extended mode. We can use check_escape() to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but @@ -820,123 +1391,239 @@ do *ptr == CHAR_BACKSLASH) { int errorcode; + case_state new_forcecase = { PCRE2_SUBSTITUTE_CASE_NONE, FALSE }; if (ptr < repend - 1) switch (ptr[1]) { case CHAR_L: - forcecase = forcecasereset = -1; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + new_forcecase.single_char = FALSE; ptr += 2; - continue; + break; case CHAR_l: - forcecase = -1; - forcecasereset = 0; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_LOWER; + new_forcecase.single_char = TRUE; ptr += 2; - continue; + if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U) + { + /* Perl reverse-title-casing feature for \l\U */ + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_REVERSE_TITLE_FIRST; + new_forcecase.single_char = FALSE; + ptr += 2; + } + break; case CHAR_U: - forcecase = forcecasereset = 1; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_UPPER; + new_forcecase.single_char = FALSE; ptr += 2; - continue; + break; case CHAR_u: - forcecase = 1; - forcecasereset = 0; + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST; + new_forcecase.single_char = TRUE; ptr += 2; - continue; + if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L) + { + /* Perl title-casing feature for \u\L */ + new_forcecase.to_case = PCRE2_SUBSTITUTE_CASE_TITLE_FIRST; + new_forcecase.single_char = FALSE; + ptr += 2; + } + break; default: break; } + if (new_forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE) + { + SETFORCECASE: + + /* If the substitute_case_callout is unset, our case-forcing is done + immediately. If there is a callout however, then its action is delayed + until all the characters have been collected. + + Apply the callout now, before we set the new casing mode. */ + + if (substitute_case_callout != NULL && + forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE) + DELAYEDFORCECASE(); + + forcecase = new_forcecase; + casestart_offset = buff_offset; + casestart_extra_needed = extra_needed; + continue; + } + ptr++; /* Point after \ */ rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, - code->overall_options, code->extra_options, FALSE, NULL); + code->overall_options, code->extra_options, code->top_bracket, FALSE, NULL); if (errorcode != 0) goto BADESCAPE; switch(rc) { case ESC_E: - forcecase = forcecasereset = 0; - continue; + goto SETFORCECASE; case ESC_Q: escaped_literal = TRUE; continue; case 0: /* Data character */ - goto LITERAL; + case ESC_b: /* \b is backspace in a substitution */ + case ESC_v: /* \v is vertical tab in a substitution */ + + if (rc == ESC_b) ch = CHAR_BS; + if (rc == ESC_v) ch = CHAR_VT; + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(temp, chlen); + else + CHECKMEMCPY(temp, chlen); + continue; + + case ESC_g: + { + PCRE2_SIZE name_len; + PCRE2_SPTR name_start; + + /* Parse the \g form (\g already handled by check_escape) */ + if (ptr >= repend || *ptr != CHAR_LESS_THAN_SIGN) + goto BADESCAPE; + ++ptr; + + name_start = ptr; + if (!read_name_subst(&ptr, repend, utf, code->tables + ctypes_offset)) + goto BADESCAPE; + name_len = ptr - name_start; + + if (ptr >= repend || *ptr != CHAR_GREATER_THAN_SIGN) + goto BADESCAPE; + ++ptr; + + special = 0; + group = -1; + memcpy(name, name_start, CU2BYTES(name_len)); + name[name_len] = 0; + goto GROUP_SUBSTITUTE; + } default: + if (rc < 0) + { + special = 0; + group = -rc - 1; + goto GROUP_SUBSTITUTE; + } goto BADESCAPE; } - } + } /* End of backslash processing */ /* Handle a literal code unit */ else { + PCRE2_SPTR ch_start; + LOADLITERAL: + ch_start = ptr; GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ + (void) ch; - LITERAL: - if (forcecase != 0) - { -#ifdef SUPPORT_UNICODE - if (utf || ucp) - { - uint32_t type = UCD_CHARTYPE(ch); - if (PRIV(ucp_gentype)[type] == ucp_L && - type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) - ch = UCD_OTHERCASE(ch); - } - else -#endif - { - if (((code->tables + cbits_offset + - ((forcecase > 0)? cbit_upper:cbit_lower) - )[ch/8] & (1u << (ch%8))) == 0) - ch = (code->tables + fcc_offset)[ch]; - } - forcecase = forcecasereset; - } - -#ifdef SUPPORT_UNICODE - if (utf) chlen = PRIV(ord2utf)(ch, temp); else -#endif - { - temp[0] = ch; - chlen = 1; - } - CHECKMEMCPY(temp, chlen); + if (forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE && + substitute_case_callout == NULL) + CHECKCASECPY_DEFAULT(ch_start, ptr - ch_start); + else + CHECKMEMCPY(ch_start, ptr - ch_start); } /* End handling a literal code unit */ } /* End of loop for scanning the replacement. */ + /* If the substitute_case_callout is unset, our case-forcing is done + immediately. If there is a callout however, then its action is delayed + until all the characters have been collected. + + We now clean up any trailing section of the replacement for which we deferred + the case-forcing. */ + + if (substitute_case_callout != NULL && + forcecase.to_case != PCRE2_SUBSTITUTE_CASE_NONE) + DELAYEDFORCECASE(); + /* The replacement has been copied to the output, or its size has been - remembered. Do the callout if there is one and we have done an actual - replacement. */ + remembered. Handle the callout if there is one. */ - if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL) + if (mcontext != NULL && mcontext->substitute_callout != NULL) { - scb.subscount = subs; - scb.output_offsets[1] = buff_offset; - rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data); + /* If we an actual (non-simulated) replacement, do the callout. */ - /* A non-zero return means cancel this substitution. Instead, copy the - matched string fragment. */ + if (!overflowed) + { + scb.subscount = subs; + scb.output_offsets[1] = buff_offset; + rc = mcontext->substitute_callout(&scb, + mcontext->substitute_callout_data); - if (rc != 0) + /* A non-zero return means cancel this substitution. Instead, copy the + matched string fragment. */ + + if (rc != 0) + { + PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; + PCRE2_SIZE oldlength = ovector[1] - ovector[0]; + + buff_offset -= newlength; + lengthleft += newlength; + if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength); + + /* A negative return means do not do any more. */ + + if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); + } + } + + /* In this interesting case, we cannot do the callout, so it's hard to + estimate the required buffer size. What callers want is to be able to make + two calls to pcre2_substitute(), once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + to discover the buffer size, and then a second and final call. Older + versions of PCRE2 violated this assumption, by proceding as if the callout + had returned zero - but on the second call to pcre2_substitute() it could + return non-zero and then overflow the buffer again. Callers probably don't + want to keep on looping to incrementally discover the buffer size. */ + + else { - PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; + PCRE2_SIZE newlength_buf = buff_offset - scb.output_offsets[0]; + PCRE2_SIZE newlength_extra = extra_needed - sub_start_extra_needed; + PCRE2_SIZE newlength = + (newlength_extra > ~(PCRE2_SIZE)0 - newlength_buf)? /* Integer overflow */ + ~(PCRE2_SIZE)0 : newlength_buf + newlength_extra; /* Cap the addition */ PCRE2_SIZE oldlength = ovector[1] - ovector[0]; - buff_offset -= newlength; - lengthleft += newlength; - if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength); + /* Be pessimistic: request whichever buffer size is larger out of + accepting or rejecting the substitution. */ - /* A negative return means do not do any more. */ + if (oldlength > newlength) + { + PCRE2_SIZE additional = oldlength - newlength; + if (additional > ~(PCRE2_SIZE)0 - extra_needed) /* Integer overflow */ + goto TOOLARGEREPLACE; + extra_needed += additional; + } - if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); + /* Proceed as if the callout did not return a negative. A negative + effectively rejects all future substitutions, but we want to examine them + pessimistically. */ } } @@ -973,6 +1660,9 @@ needed. Otherwise, an overflow generates an immediate error return. */ if (overflowed) { rc = PCRE2_ERROR_NOMEMORY; + + if (extra_needed > ~(PCRE2_SIZE)0 - buff_length) /* Integer overflow */ + goto TOOLARGEREPLACE; *blength = buff_length + extra_needed; } @@ -994,6 +1684,14 @@ NOROOM: rc = PCRE2_ERROR_NOMEMORY; goto EXIT; +CASEERROR: +rc = PCRE2_ERROR_REPLACECASE; +goto EXIT; + +TOOLARGEREPLACE: +rc = PCRE2_ERROR_TOOLARGEREPLACE; +goto EXIT; + BAD: rc = PCRE2_ERROR_BADREPLACEMENT; goto PTREXIT; diff --git a/src/pcre2_substring.c b/src/pcre2_substring.c index 14e919d..88afd23 100644 --- a/src/pcre2_substring.c +++ b/src/pcre2_substring.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -486,7 +486,7 @@ pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname, uint16_t bot = 0; uint16_t top = code->name_count; uint16_t entrysize = code->name_entry_size; -PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code)); +PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code)); while (top > bot) { diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c index 97dbc8b..4c5e516 100644 --- a/src/pcre2_ucd.c +++ b/src/pcre2_ucd.c @@ -72,11 +72,13 @@ const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}}; const uint16_t PRIV(ucd_stage1)[] = {0}; const uint16_t PRIV(ucd_stage2)[] = {0}; const uint32_t PRIV(ucd_caseless_sets)[] = {0}; +const uint32_t PRIV(ucd_nocase_ranges)[] = {0}; +const uint32_t PRIV(ucd_nocase_ranges_size) = 0; #else -/* Total size: 112564 bytes, block size: 128. */ +/* Total size: 116564 bytes, block size: 128. */ -const char *PRIV(unicode_version) = "15.0.0"; +const char *PRIV(unicode_version) = "16.0.0"; /* When recompiling tables with a new Unicode version, please check the types in this structure definition with those in pcre2_internal.h (the actual field @@ -140,28 +142,87 @@ const uint32_t PRIV(ucd_caseless_sets)[] = { 0x004b, 0x006b, 0x212a, NOTACHAR, 0x00c5, 0x00e5, 0x212b, NOTACHAR, 0x1c88, 0xa64a, 0xa64b, NOTACHAR, + 0x0069, 0x0130, NOTACHAR, + 0x0049, 0x0131, NOTACHAR, }; +/* This is the index, within ucd_caseless_sets, of the additional +Turkish case-equivalences. The dotted I ones are this offset; the +dotless I are +3 from here. */ + +const uint32_t PRIV(ucd_turkish_dotted_i_caseset) = 112; + /* When #included in pcre2test, we don't need the table of digit sets, nor the the large main UCD tables. */ #ifndef PCRE2_PCRE2TEST +/* This table contains character ranges, where the characters in the range have +no other case. Both start and end values are excluded from the range. */ + +const uint32_t PRIV(ucd_nocase_ranges)[] = { + 0x0000, 0x0041, /* 64 */ + 0x007a, 0x00b5, /* 58 */ + 0x00b5, 0x00c0, /* 10 */ + 0x0292, 0x029d, /* 10 */ + 0x029e, 0x0345, /* 166 */ + 0x0345, 0x0370, /* 42 */ + 0x0481, 0x048a, /* 8 */ + 0x0556, 0x0561, /* 10 */ + 0x0586, 0x10a0, /* 2841 */ + 0x10ff, 0x13a0, /* 672 */ + 0x13fd, 0x1c80, /* 2178 */ + 0x1cbf, 0x1d79, /* 185 */ + 0x1d7d, 0x1d8e, /* 16 */ + 0x1d8e, 0x1e00, /* 113 */ + 0x1ffc, 0x2126, /* 297 */ + 0x2132, 0x214e, /* 27 */ + 0x214e, 0x2160, /* 17 */ + 0x2184, 0x24b6, /* 817 */ + 0x24e9, 0x2c00, /* 1814 */ + 0x2cf3, 0x2d00, /* 12 */ + 0x2d2d, 0xa640, /* 30994 */ + 0xa66d, 0xa680, /* 18 */ + 0xa69b, 0xa722, /* 134 */ + 0xa76f, 0xa779, /* 9 */ + 0xa7dc, 0xa7f5, /* 24 */ + 0xa7f6, 0xab53, /* 860 */ + 0xab53, 0xab70, /* 28 */ + 0xabbf, 0xfb05, /* 20293 */ + 0xfb06, 0xff21, /* 1050 */ + 0xff5a, 0x10400, /* 1189 */ + 0x1044f, 0x104b0, /* 96 */ + 0x104fb, 0x10570, /* 116 */ + 0x105bc, 0x10c80, /* 1731 */ + 0x10cb2, 0x10cc0, /* 13 */ + 0x10cf2, 0x10d50, /* 93 */ + 0x10d65, 0x10d70, /* 10 */ + 0x10d85, 0x118a0, /* 2842 */ + 0x118df, 0x16e40, /* 21856 */ + 0x16e7f, 0x1e900, /* 31360 */ + 0x1e943, 0x110000, /* 988860 */ + 0xffffffff, 0xffffffff /* terminator */ +}; + +/* Total: 1110933 characters. */ +const uint32_t PRIV(ucd_nocase_ranges_size) = 80; + /* This table lists the code points for the '9' characters in each set of decimal digits. It is used to ensure that all the digits in a script run come from the same set. */ const uint32_t PRIV(ucd_digit_sets)[] = { - 68, /* Number of subsequent values */ + 76, /* Number of subsequent values */ 0x00039, 0x00669, 0x006f9, 0x007c9, 0x0096f, 0x009ef, 0x00a6f, 0x00aef, 0x00b6f, 0x00bef, 0x00c6f, 0x00cef, 0x00d6f, 0x00def, 0x00e59, 0x00ed9, 0x00f29, 0x01049, 0x01099, 0x017e9, 0x01819, 0x0194f, 0x019d9, 0x01a89, 0x01a99, 0x01b59, 0x01bb9, 0x01c49, 0x01c59, 0x0a629, 0x0a8d9, 0x0a909, - 0x0a9d9, 0x0a9f9, 0x0aa59, 0x0abf9, 0x0ff19, 0x104a9, 0x10d39, 0x1106f, - 0x110f9, 0x1113f, 0x111d9, 0x112f9, 0x11459, 0x114d9, 0x11659, 0x116c9, - 0x11739, 0x118e9, 0x11959, 0x11c59, 0x11d59, 0x11da9, 0x11f59, 0x16a69, - 0x16ac9, 0x16b59, 0x1d7d7, 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e149, - 0x1e2f9, 0x1e4f9, 0x1e959, 0x1fbf9, + 0x0a9d9, 0x0a9f9, 0x0aa59, 0x0abf9, 0x0ff19, 0x104a9, 0x10d39, 0x10d49, + 0x1106f, 0x110f9, 0x1113f, 0x111d9, 0x112f9, 0x11459, 0x114d9, 0x11659, + 0x116c9, 0x116d9, 0x116e3, 0x11739, 0x118e9, 0x11959, 0x11bf9, 0x11c59, + 0x11d59, 0x11da9, 0x11f59, 0x16139, 0x16a69, 0x16ac9, 0x16b59, 0x16d79, + 0x1ccf9, 0x1d7d7, 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e149, 0x1e2f9, + 0x1e4f9, 0x1e5fa, 0x1e959, 0x1fbf9, }; /* This vector is a list of script bitsets for the Script Extension property. @@ -169,69 +230,117 @@ The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as ucd_script_sets_item_size. */ const uint32_t PRIV(ucd_script_sets)[] = { - 0x00000000u, 0x00000000u, 0x00000000u, - 0x00000080u, 0x00000000u, 0x00000000u, - 0x00000040u, 0x00000000u, 0x00000000u, - 0x00000000u, 0x00004000u, 0x00000000u, - 0x00000002u, 0x00000000u, 0x00000000u, - 0x00800000u, 0x00000000u, 0x00000000u, - 0x00000001u, 0x00000000u, 0x00000000u, - 0x00000000u, 0x00000000u, 0x00000001u, - 0x00000010u, 0x00000000u, 0x00000000u, - 0x00000008u, 0x00000004u, 0x00000000u, - 0x00000008u, 0x40000000u, 0x00000000u, - 0x00000008u, 0x00000040u, 0x00000000u, - 0x00000018u, 0x00000000u, 0x00000000u, - 0x00000028u, 0x00000000u, 0x00000000u, - 0x000000c0u, 0x00000000u, 0x00000000u, - 0x00c00000u, 0x00000000u, 0x00000000u, - 0x00000000u, 0x00000102u, 0x00000000u, - 0x80000000u, 0x00000001u, 0x00000000u, - 0x00000004u, 0x00000008u, 0x00000000u, - 0x00000005u, 0x00000000u, 0x00000000u, - 0x00000004u, 0x00200000u, 0x00000000u, - 0x00000014u, 0x00000000u, 0x00000000u, - 0x00000040u, 0x00008000u, 0x00000000u, - 0x00000040u, 0x00000000u, 0x00000001u, - 0x00000040u, 0x00001000u, 0x00000000u, - 0x00000840u, 0x00000000u, 0x00000000u, - 0x00020001u, 0x00000000u, 0x00000000u, - 0x00000800u, 0x00008000u, 0x00000000u, - 0x00000200u, 0x00010000u, 0x00000000u, - 0x00000100u, 0x02000000u, 0x00000000u, - 0x00800001u, 0x00000000u, 0x00000000u, - 0x00300000u, 0x00000000u, 0x00000000u, - 0x00002000u, 0x00000000u, 0x00000001u, - 0x00080001u, 0x00000000u, 0x00000000u, - 0x00000000u, 0x00080000u, 0x00000008u, - 0x00080000u, 0x00000020u, 0x00000000u, - 0x00000038u, 0x00000000u, 0x00000000u, - 0x00000028u, 0x00000000u, 0x00000002u, - 0x00000080u, 0x00000810u, 0x00000000u, - 0x40010000u, 0x00000800u, 0x00000000u, - 0x80000000u, 0x00000001u, 0x00000004u, - 0x80000000u, 0x00020001u, 0x00000000u, - 0x00002040u, 0x00008000u, 0x00000000u, - 0x00000041u, 0x00008000u, 0x00000000u, - 0x00b00000u, 0x00000000u, 0x00000000u, - 0x00010001u, 0x00000080u, 0x00000000u, - 0x000020c0u, 0x00008000u, 0x00000000u, - 0x1e000000u, 0x00000000u, 0x00000000u, - 0x00000040u, 0x10040200u, 0x00000000u, - 0x00f40000u, 0x00000000u, 0x00000000u, - 0x00000038u, 0x40000040u, 0x00000002u, - 0x01f40000u, 0x00000000u, 0x00000000u, - 0x00007c40u, 0x00000000u, 0x00000000u, - 0x00000038u, 0x44000040u, 0x00000002u, - 0x000034c0u, 0x01008000u, 0x00000001u, - 0x00000018u, 0xc4480400u, 0x00000008u, - 0x00000340u, 0x11952200u, 0x00000000u, - 0x00007fc1u, 0x01008000u, 0x00000000u, - 0x00007fc1u, 0x01009000u, 0x00000000u, - 0x00002340u, 0x11952200u, 0x00000001u, - 0x00006340u, 0x11952200u, 0x00000001u, - 0x0000ffc0u, 0x3984a010u, 0x00000001u, - 0x2000ffc0u, 0x3984a010u, 0x00000001u, + 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x40200003u, 0x00381901u, 0x00100246u, 0x00000000u, + 0x00040305u, 0x00800000u, 0x08000000u, 0x00000000u, + 0x20000001u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000001u, 0x00800000u, 0x00000000u, 0x00000000u, + 0x00040001u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x01000007u, 0x00000840u, 0x80000200u, 0x00000000u, + 0x01000007u, 0x00000040u, 0x80010000u, 0x00000001u, + 0x01000005u, 0x00002000u, 0x00000000u, 0x00000000u, + 0x00040041u, 0x00001000u, 0x80000000u, 0x00000000u, + 0x01000047u, 0x00002801u, 0x00010001u, 0x00000001u, + 0x10000001u, 0x00001801u, 0x00000004u, 0x00000000u, + 0x00000007u, 0x00000000u, 0x00000200u, 0x00000000u, + 0x00000051u, 0x00002840u, 0x00000202u, 0x00000001u, + 0x0000005fu, 0x00000041u, 0x00000202u, 0x00000000u, + 0x00000001u, 0x00002000u, 0x00000000u, 0x00000000u, + 0x00000041u, 0x00000000u, 0x00000002u, 0x00000000u, + 0x01000005u, 0x00000000u, 0x00010000u, 0x00000000u, + 0x01000001u, 0x00000040u, 0x00000000u, 0x00000000u, + 0x00000001u, 0x00000000u, 0x80000000u, 0x00000000u, + 0x00800001u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000005u, 0x00000000u, 0x00000000u, 0x00000001u, + 0x00000003u, 0x00000000u, 0x00000200u, 0x00000001u, + 0x00000041u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x11000041u, 0x00000000u, 0x00000002u, 0x00000000u, + 0x01000041u, 0x00000000u, 0x00000002u, 0x00000000u, + 0x00000041u, 0x00000000u, 0x80000000u, 0x00000000u, + 0x01000041u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x01040001u, 0x00000001u, 0x80000001u, 0x00000000u, + 0x00000002u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000001u, 0x00000000u, 0x00010000u, 0x00000000u, + 0x00000001u, 0x00000000u, 0x00000001u, 0x00000001u, + 0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000002u, 0x00000800u, 0x00000000u, 0x00000000u, + 0x00000004u, 0x00000000u, 0x00000200u, 0x00000000u, + 0x00000004u, 0x00001000u, 0x00000000u, 0x00000000u, + 0x00000005u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00200008u, 0x00001000u, 0x00000000u, 0x00000000u, + 0x000000e0u, 0x00010000u, 0x11200000u, 0x00000000u, + 0x000000e0u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x000000e0u, 0x00010000u, 0x11208000u, 0x00000000u, + 0x00000060u, 0x08000000u, 0x04608480u, 0x00000000u, + 0x00000060u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x000000a0u, 0x00000000u, 0x01000000u, 0x00000000u, + 0x00000020u, 0x00000000u, 0x00200000u, 0x00000000u, + 0x0001ff01u, 0x40000000u, 0x00001008u, 0x00000000u, + 0x0001ff01u, 0x00000000u, 0x00001008u, 0x00000000u, + 0x0003ff00u, 0x80004000u, 0x409c1848u, 0x00000000u, + 0x0003ff00u, 0x80004020u, 0x609c1848u, 0x00000000u, + 0x00000100u, 0x04000000u, 0x00080040u, 0x00000000u, + 0x00000200u, 0x10004000u, 0x00000000u, 0x00000000u, + 0x00000400u, 0x00000000u, 0x00002000u, 0x00000000u, + 0x00000800u, 0x00000000u, 0x00000010u, 0x00000000u, + 0x00002000u, 0x00000000u, 0x00000008u, 0x00000000u, + 0x00008000u, 0x00000000u, 0x00800000u, 0x00000002u, + 0x00100000u, 0x10000040u, 0x00000000u, 0x00000000u, + 0x00200001u, 0x00001000u, 0x00000000u, 0x00000000u, + 0x02000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x0000001eu, 0x00000000u, 0x00000000u, + 0x04000000u, 0x00008000u, 0x00000000u, 0x00000000u, + 0x00008300u, 0x00000000u, 0x00000008u, 0x00000000u, + 0x00000100u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00008100u, 0x00000000u, 0x00000008u, 0x00000000u, + 0x00000300u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000100u, 0x40000000u, 0x00000000u, 0x00000000u, + 0x0001f100u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000100u, 0x00000000u, 0x00800000u, 0x00000000u, + 0x0003d300u, 0x00000000u, 0x00801008u, 0x00000002u, + 0x00000100u, 0x00000000u, 0x00000008u, 0x00000000u, + 0x00008100u, 0x00000000u, 0x00000008u, 0x00000002u, + 0x00000200u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00800000u, 0x00000000u, + 0x00000045u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000040u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x04000001u, 0x00008000u, 0x00000000u, 0x00000000u, + 0x00000020u, 0x00000000u, 0x00008000u, 0x00000000u, + 0x00200000u, 0x020c1000u, 0x00004000u, 0x00000000u, + 0x00000002u, 0x20080000u, 0x00004000u, 0x00000000u, + 0x00000101u, 0x00000000u, 0x00000008u, 0x00000000u, + 0x00000001u, 0x00000800u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x02200000u, 0x00000000u, 0x00000000u, + 0x00200000u, 0x04780000u, 0x00004000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x00000002u, 0x00000000u, + 0x00000020u, 0x00000000u, 0x0000c000u, 0x00000000u, + 0x40000000u, 0x00000000u, 0x00020000u, 0x00000000u, + 0xfc400000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0xfc400000u, 0x00008000u, 0x00000000u, 0x00000000u, + 0x78400000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x40000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0xfc480000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0xfc480000u, 0x00800000u, 0x00000000u, 0x00000000u, + 0xf8400000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x60000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x18000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x58000000u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x40000001u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00018d00u, 0xc4000000u, 0x00881950u, 0x00000002u, + 0x00008d00u, 0xc4000000u, 0x00881950u, 0x00000002u, + 0x00000d00u, 0x84000000u, 0x00081950u, 0x00000000u, + 0x00000d00u, 0xc4000000u, 0x00081950u, 0x00000000u, + 0x00000300u, 0x00000000u, 0x00000000u, 0x00000002u, + 0x00002100u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00100001u, 0x00020000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x01000400u, 0x00000000u, 0x00000000u, + 0x00000020u, 0x00010000u, 0x00000000u, 0x00000000u, + 0x000000a0u, 0x00000000u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000280u, 0x02000000u, 0x00000000u, + 0x00000000u, 0x00000280u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000280u, 0x00000020u, 0x00000000u, + 0x00000020u, 0x00000800u, 0x00000000u, 0x00000000u, + 0x00000000u, 0x00000000u, 0x04000080u, 0x00000000u, }; /* This vector is a list of bitsets for Boolean properties. The number of @@ -241,181 +350,196 @@ pcre2_ucp.h. */ const uint32_t PRIV(ucd_boolprop_sets)[] = { 0x00000000u, 0x00000000u, 0x00000001u, 0x00000000u, - 0x00000001u, 0x00020040u, - 0x00800001u, 0x00020040u, - 0x00800001u, 0x00002820u, - 0x00800001u, 0x00000120u, - 0x00830001u, 0x00000020u, - 0x00800001u, 0x00000020u, - 0x00800021u, 0x00000120u, - 0x00800011u, 0x00000020u, - 0x00800001u, 0x00000028u, - 0x00800001u, 0x00002020u, - 0x00801001u, 0x00000020u, - 0x00800021u, 0x00002820u, - 0x24830003u, 0x00040000u, - 0x00800021u, 0x00002020u, - 0x00800011u, 0x00000028u, - 0x648003c7u, 0x000c8000u, - 0x608003c5u, 0x000c8000u, - 0x00808021u, 0x00000028u, - 0x20800001u, 0x00040000u, - 0x00808021u, 0x00000020u, - 0x64800d47u, 0x000c0004u, - 0x60800d45u, 0x000c0004u, - 0x60800d45u, 0x000c1004u, - 0x00000000u, 0x00020040u, - 0x00800000u, 0x00020000u, - 0x00800000u, 0x00000020u, + 0x00000001u, 0x00400800u, + 0x00800001u, 0x00400800u, + 0x00800001u, 0x00050400u, + 0x00800001u, 0x00002400u, + 0x00830001u, 0x00000400u, + 0x00800001u, 0x00000400u, + 0x00800021u, 0x00002400u, + 0x00800011u, 0x00000400u, + 0x00800001u, 0x00000480u, + 0x00800001u, 0x00040400u, + 0x00801001u, 0x00000400u, + 0x00800021u, 0x00050400u, + 0x04830003u, 0x00800001u, + 0x00800021u, 0x00040400u, + 0x00800011u, 0x00000480u, + 0x048003c7u, 0x01900003u, + 0x008003c5u, 0x01900003u, + 0x00808021u, 0x00000480u, + 0x00800001u, 0x00800001u, + 0x00808021u, 0x00000400u, + 0x04800d47u, 0x01800043u, + 0x00800d45u, 0x01800043u, + 0x00800d45u, 0x01820043u, + 0x00000000u, 0x00400800u, + 0x00800000u, 0x00400000u, + 0x00800000u, 0x00000400u, 0x00808020u, 0x00000000u, - 0x00a10000u, 0x00000020u, - 0x60800044u, 0x000c0004u, - 0x00800010u, 0x00000120u, - 0x00800000u, 0x00000028u, + 0x00a10000u, 0x00000400u, + 0x00800044u, 0x01800043u, + 0x00800010u, 0x00002400u, + 0x00800000u, 0x00000480u, 0x00002020u, 0x00000000u, + 0x40800000u, 0x00000000u, + 0x00800dc4u, 0x01800043u, + 0x00c08020u, 0x00800001u, 0x00800000u, 0x00000000u, - 0x60800dc4u, 0x000c0004u, - 0x20c08020u, 0x00040000u, - 0x608003c4u, 0x000c8000u, - 0x60800d44u, 0x000c0004u, - 0x60800d44u, 0x000c1004u, - 0x60804dc4u, 0x000c0004u, - 0x60800004u, 0x000c0000u, - 0x608007c4u, 0x000c8000u, - 0x60800bc4u, 0x000c0000u, - 0x60808064u, 0x000c0004u, - 0x60808064u, 0x000c1004u, - 0x60808024u, 0x000c0000u, - 0x60c08024u, 0x000c0000u, - 0x21008020u, 0x00040000u, - 0x21008de4u, 0x00040004u, - 0x21002020u, 0x00040000u, - 0x21000020u, 0x00040000u, - 0x60808064u, 0x00000004u, - 0x00800000u, 0x00002000u, - 0x20800020u, 0x00042000u, - 0x60800dc4u, 0x000c000cu, - 0x60800044u, 0x000c8008u, - 0x60800044u, 0x000c8000u, - 0x608003c4u, 0x000c8008u, - 0x00800000u, 0x00000008u, - 0x01000020u, 0x00000000u, + 0x008003c4u, 0x01900003u, + 0x00800d44u, 0x01800043u, + 0x00800d44u, 0x01820043u, + 0x00804dc4u, 0x01800043u, + 0x00800004u, 0x01800003u, + 0x008007c4u, 0x01900003u, + 0x00800bc4u, 0x01800003u, + 0x00808064u, 0x01800043u, + 0x00808064u, 0x01820043u, + 0x00808024u, 0x01800003u, + 0x00c08024u, 0x01800003u, + 0x01008020u, 0x00800009u, + 0x01008de4u, 0x00800049u, + 0x01002020u, 0x00800009u, + 0x01000020u, 0x00800009u, + 0x01000024u, 0x00800009u, + 0x00808064u, 0x00000043u, + 0x00800000u, 0x00040000u, + 0x00800020u, 0x00840001u, + 0x00800dc4u, 0x018000c3u, + 0x00800044u, 0x01900083u, + 0x00800044u, 0x01900003u, + 0x008003c4u, 0x01900083u, + 0x00800000u, 0x00000080u, + 0x01000020u, 0x00000008u, 0x00800020u, 0x00000000u, - 0x00800000u, 0x00002800u, + 0x00800000u, 0x00050000u, 0x00801000u, 0x00000000u, - 0x21008024u, 0x00040000u, - 0x21000024u, 0x00040000u, - 0x00000020u, 0x00000080u, + 0x01008024u, 0x00800009u, + 0x00000020u, 0x00001000u, 0x00002028u, 0x00000000u, - 0x60c00024u, 0x000c0000u, - 0x20800000u, 0x00040000u, - 0x60804004u, 0x000c0000u, - 0x60800024u, 0x000c0000u, - 0x20800004u, 0x00040000u, - 0x23008020u, 0x00040000u, - 0x21000004u, 0x00040000u, - 0x21408020u, 0x00040000u, - 0x60800004u, 0x00040000u, - 0x23000024u, 0x00040000u, - 0x60800004u, 0x000c0002u, + 0x00c00024u, 0x01800003u, + 0x01000024u, 0x00800109u, + 0x01008020u, 0x00800109u, + 0x00800000u, 0x00800001u, + 0x00804004u, 0x01800003u, + 0x00800024u, 0x01800003u, + 0x01000020u, 0x00800109u, + 0x01008024u, 0x00800109u, + 0x00800004u, 0x00800001u, + 0x00800004u, 0x0180000bu, + 0x03008020u, 0x00800009u, + 0x01000004u, 0x00800009u, + 0x01400024u, 0x00800009u, + 0x01408020u, 0x00800009u, + 0x00800004u, 0x00800003u, + 0x03008024u, 0x00800009u, + 0x00800004u, 0x01800023u, 0x00800010u, 0x00000000u, - 0x20808000u, 0x00040000u, - 0x21004024u, 0x00040000u, - 0x20808004u, 0x00040000u, - 0x60800944u, 0x000c0004u, - 0x60800064u, 0x000c0004u, - 0x60802004u, 0x000c0000u, - 0x60800344u, 0x000c8000u, - 0x22808000u, 0x00040000u, - 0x22800000u, 0x00040000u, + 0x00808000u, 0x00800001u, + 0x01004024u, 0x00800009u, + 0x00808004u, 0x00800001u, + 0x00800944u, 0x01800043u, + 0x00800064u, 0x01800043u, + 0x00802004u, 0x01800003u, + 0x00800344u, 0x01900003u, + 0x03008000u, 0x00800009u, 0x00c00000u, 0x00000000u, - 0x21002020u, 0x00050000u, - 0x61000024u, 0x000c0000u, - 0x23000020u, 0x00040000u, - 0x01008020u, 0x00000000u, - 0x21408024u, 0x00040000u, + 0x01002020u, 0x00a00009u, + 0x01000024u, 0x0180000bu, + 0x01008020u, 0x00000008u, + 0x01408024u, 0x00800009u, 0x00808000u, 0x00000000u, - 0x60800044u, 0x000c1004u, - 0x60800064u, 0x000c1004u, - 0x01002020u, 0x00000001u, - 0x00022020u, 0x00000001u, - 0x00002028u, 0x00000040u, - 0x00801000u, 0x00000020u, - 0x00800020u, 0x00000120u, - 0x00800000u, 0x00000120u, - 0x00800020u, 0x00000020u, - 0x00a10000u, 0x00002820u, - 0x00800000u, 0x00002820u, - 0x20800000u, 0x00040008u, - 0x00800010u, 0x00000020u, - 0x00002020u, 0x00000008u, + 0x00800044u, 0x01820043u, + 0x00800064u, 0x01820043u, + 0x01002020u, 0x00800011u, + 0x00022020u, 0x00800019u, + 0x00002028u, 0x00000800u, + 0x00801000u, 0x00000400u, + 0x00800020u, 0x00002400u, + 0x00800000u, 0x00002400u, + 0x00800020u, 0x00050400u, + 0x00800020u, 0x00000400u, + 0x00a10000u, 0x00050400u, + 0x00800000u, 0x00050400u, + 0x00800000u, 0x00800081u, + 0x00800010u, 0x00000400u, + 0x00002020u, 0x00000080u, 0x00002000u, 0x00000000u, 0x00006020u, 0x00000000u, - 0x00801000u, 0x00000008u, - 0x00800010u, 0x00000008u, - 0x21000020u, 0x00040008u, - 0x01020020u, 0x00000000u, - 0x60800044u, 0x000c000cu, - 0x60800000u, 0x000c0008u, + 0x40800000u, 0x00000080u, + 0x40801000u, 0x00000080u, + 0x40800010u, 0x00000080u, + 0x01000020u, 0x00800089u, + 0x01020020u, 0x00000008u, + 0x00800044u, 0x018000c3u, + 0x00800000u, 0x01800083u, 0x00a10000u, 0x00000000u, - 0x60800000u, 0x000c0000u, - 0x60800004u, 0x000c0008u, - 0x60a10044u, 0x000c0004u, - 0x60800044u, 0x000c100cu, - 0x00a10000u, 0x00000028u, - 0x00800010u, 0x00000028u, - 0x00801000u, 0x00000028u, - 0x00b10000u, 0x00000020u, - 0x00804010u, 0x00000020u, - 0x00a00000u, 0x00000020u, - 0x00000000u, 0x00000020u, - 0x008003c4u, 0x00008000u, - 0x00a103c4u, 0x00008000u, - 0x00800d44u, 0x00000004u, - 0x00b10000u, 0x00000028u, - 0x00a00000u, 0x00000028u, - 0x00a90000u, 0x00000020u, - 0x00b90000u, 0x00000020u, - 0x00808024u, 0x00000020u, - 0x00800000u, 0x00002020u, - 0x00800000u, 0x00000200u, + 0x00800000u, 0x01800003u, + 0x00800004u, 0x01800083u, + 0x00a10044u, 0x01800043u, + 0x00800044u, 0x018200c3u, + 0x00a10000u, 0x00000480u, + 0xc0800000u, 0x00000480u, + 0x00800010u, 0x00000480u, + 0x00801000u, 0x00000480u, + 0x00b10000u, 0x00000400u, + 0x00804010u, 0x00000400u, + 0x00a00000u, 0x00000400u, + 0x00000000u, 0x00000400u, + 0x008003c4u, 0x00100000u, + 0x00a103c4u, 0x00100000u, + 0x00800d44u, 0x00000040u, + 0x00b10000u, 0x00000480u, + 0x00a00000u, 0x00000480u, + 0x00a90000u, 0x00000400u, + 0x00b90000u, 0x00000400u, + 0x03000020u, 0x00800009u, + 0x00808024u, 0x00000400u, + 0x00800000u, 0x00040400u, + 0x00800000u, 0x00004000u, 0x08800000u, 0x00000000u, 0x10800000u, 0x00000000u, - 0xe0800004u, 0x000c0000u, - 0x21008000u, 0x00040000u, - 0x00a11000u, 0x00000020u, - 0x60808020u, 0x00000000u, - 0xe0800004u, 0x000c4000u, - 0x60808004u, 0x000c0000u, - 0x60800004u, 0x00000000u, - 0x00000000u, 0x00000010u, - 0x21022020u, 0x00050000u, - 0x00800000u, 0x00000100u, - 0x00800020u, 0x00002800u, + 0x20800000u, 0x00000000u, + 0x00800004u, 0x01800007u, + 0x01008000u, 0x00800009u, + 0x00a11000u, 0x00000400u, + 0x00808020u, 0x00000003u, + 0x00800004u, 0x01880007u, + 0x00808004u, 0x01800003u, + 0x00800004u, 0x00000003u, + 0x00000000u, 0x00000200u, + 0x01022020u, 0x00a00009u, + 0x00800000u, 0x00002000u, + 0x00800020u, 0x00050000u, + 0x00800020u, 0x00040000u, + 0x00801000u, 0x00000080u, + 0x00800010u, 0x00000080u, 0x00800020u, 0x00002000u, - 0x00800020u, 0x00000100u, - 0x24800000u, 0x00040000u, - 0x648003c4u, 0x000c8000u, - 0x00808020u, 0x00000008u, - 0x64800d44u, 0x000c0004u, - 0x00800010u, 0x00000100u, - 0x61008024u, 0x00040000u, + 0x04800000u, 0x00800001u, + 0x048003c4u, 0x01900003u, + 0x00808020u, 0x00000080u, + 0x04800d44u, 0x01800043u, + 0x00800010u, 0x00002000u, + 0x01008024u, 0x0080000bu, 0x00000020u, 0x00000000u, - 0x60c00004u, 0x000c0000u, - 0x21400020u, 0x00040000u, - 0xa1000020u, 0x00040000u, - 0x21000000u, 0x00040000u, + 0x00c00004u, 0x01800003u, + 0x00c08004u, 0x01800003u, + 0x01400020u, 0x00800009u, + 0x01000020u, 0x0080000du, + 0x01008004u, 0x00800009u, + 0x01000000u, 0x00800009u, + 0xc0800000u, 0x00000080u, 0x00a00000u, 0x00000000u, 0x00b10000u, 0x00000000u, 0x00200000u, 0x00000000u, - 0x00800044u, 0x00008000u, - 0x00a10044u, 0x00008000u, - 0x00930000u, 0x00000400u, + 0x00800044u, 0x00100000u, + 0x00a10044u, 0x00100000u, + 0x00930000u, 0x00008000u, 0x00b90000u, 0x00000000u, 0x00a90000u, 0x00000000u, - 0x00970020u, 0x00000000u, + 0x00970020u, 0x00000008u, 0x00b30000u, 0x00000000u, - 0x01022020u, 0x00000000u, + 0x01022020u, 0x00000008u, }; /* These are the main two-stage UCD tables. The fields in each record are: @@ -424,1430 +548,1550 @@ offset to multichar other cases or zero (8 bits), offset to other case or zero (32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed into a 16-bit field, and offset in binary properties table (16 bits). */ -const ucd_record PRIV(ucd_records)[] = { /* 17076 bytes, record size 12 */ - { 69, 0, 2, 0, 0, 6144, 2, }, /* 0 */ - { 69, 0, 2, 0, 0, 43008, 4, }, /* 1 */ - { 69, 0, 1, 0, 0, 4096, 4, }, /* 2 */ - { 69, 0, 2, 0, 0, 45056, 4, }, /* 3 */ - { 69, 0, 0, 0, 0, 4096, 4, }, /* 4 */ - { 69, 0, 2, 0, 0, 4096, 2, }, /* 5 */ - { 69, 0, 2, 0, 0, 43008, 2, }, /* 6 */ - { 69, 29, 12, 0, 0, 45056, 6, }, /* 7 */ - { 69, 21, 12, 0, 0, 28672, 8, }, /* 8 */ - { 69, 21, 12, 0, 0, 28672, 10, }, /* 9 */ - { 69, 21, 12, 0, 0, 14336, 12, }, /* 10 */ - { 69, 23, 12, 0, 0, 14336, 14, }, /* 11 */ - { 69, 21, 12, 0, 0, 14336, 14, }, /* 12 */ - { 69, 21, 12, 0, 0, 28672, 14, }, /* 13 */ - { 69, 21, 12, 0, 0, 28672, 16, }, /* 14 */ - { 69, 22, 12, 0, 0, 28672, 18, }, /* 15 */ - { 69, 18, 12, 0, 0, 28672, 18, }, /* 16 */ - { 69, 21, 12, 0, 0, 28672, 12, }, /* 17 */ - { 69, 25, 12, 0, 0, 12288, 20, }, /* 18 */ - { 69, 21, 12, 0, 0, 8192, 22, }, /* 19 */ - { 69, 17, 12, 0, 0, 12288, 24, }, /* 20 */ - { 69, 21, 12, 0, 0, 8192, 26, }, /* 21 */ - { 69, 21, 12, 0, 0, 8192, 14, }, /* 22 */ - { 69, 13, 12, 0, 0, 10240, 28, }, /* 23 */ - { 69, 21, 12, 0, 0, 8192, 30, }, /* 24 */ - { 69, 21, 12, 0, 0, 28672, 22, }, /* 25 */ - { 69, 25, 12, 0, 0, 28672, 32, }, /* 26 */ - { 69, 25, 12, 0, 0, 28672, 20, }, /* 27 */ +const ucd_record PRIV(ucd_records)[] = { /* 18516 bytes, record size 12 */ + { 99, 0, 2, 0, 0, 6144, 2, }, /* 0 */ + { 99, 0, 2, 0, 0, 43008, 4, }, /* 1 */ + { 99, 0, 1, 0, 0, 4096, 4, }, /* 2 */ + { 99, 0, 2, 0, 0, 45056, 4, }, /* 3 */ + { 99, 0, 0, 0, 0, 4096, 4, }, /* 4 */ + { 99, 0, 2, 0, 0, 4096, 2, }, /* 5 */ + { 99, 0, 2, 0, 0, 43008, 2, }, /* 6 */ + { 99, 29, 12, 0, 0, 45056, 6, }, /* 7 */ + { 99, 21, 12, 0, 0, 28672, 8, }, /* 8 */ + { 99, 21, 12, 0, 0, 28672, 10, }, /* 9 */ + { 99, 21, 12, 0, 0, 14336, 12, }, /* 10 */ + { 99, 23, 12, 0, 0, 14336, 14, }, /* 11 */ + { 99, 21, 12, 0, 0, 14336, 14, }, /* 12 */ + { 99, 21, 12, 0, 0, 28672, 14, }, /* 13 */ + { 99, 21, 12, 0, 0, 28672, 16, }, /* 14 */ + { 99, 22, 12, 0, 0, 28672, 18, }, /* 15 */ + { 99, 18, 12, 0, 0, 28672, 18, }, /* 16 */ + { 99, 21, 12, 0, 0, 28672, 12, }, /* 17 */ + { 99, 25, 12, 0, 0, 12288, 20, }, /* 18 */ + { 99, 21, 12, 0, 0, 8192, 22, }, /* 19 */ + { 99, 17, 12, 0, 0, 12288, 24, }, /* 20 */ + { 99, 21, 12, 0, 0, 8192, 26, }, /* 21 */ + { 99, 21, 12, 0, 0, 8192, 14, }, /* 22 */ + { 99, 13, 12, 0, 0, 10240, 28, }, /* 23 */ + { 99, 21, 12, 0, 0, 8192, 30, }, /* 24 */ + { 99, 21, 12, 0, 0, 28672, 22, }, /* 25 */ + { 99, 25, 12, 0, 0, 28672, 32, }, /* 26 */ + { 99, 25, 12, 0, 0, 28672, 20, }, /* 27 */ { 0, 9, 12, 0, 32, 18432, 34, }, /* 28 */ { 0, 9, 12, 0, 32, 18432, 36, }, /* 29 */ { 0, 9, 12, 100, 32, 18432, 36, }, /* 30 */ { 0, 9, 12, 1, 32, 18432, 36, }, /* 31 */ - { 69, 24, 12, 0, 0, 28672, 38, }, /* 32 */ - { 69, 16, 12, 0, 0, 28672, 40, }, /* 33 */ - { 69, 24, 12, 0, 0, 28672, 42, }, /* 34 */ + { 99, 24, 12, 0, 0, 28672, 38, }, /* 32 */ + { 99, 16, 12, 0, 0, 28672, 40, }, /* 33 */ + { 99, 24, 12, 0, 0, 28672, 42, }, /* 34 */ { 0, 5, 12, 0, -32, 18432, 44, }, /* 35 */ { 0, 5, 12, 0, -32, 18432, 46, }, /* 36 */ { 0, 5, 12, 0, -32, 18432, 48, }, /* 37 */ { 0, 5, 12, 100, -32, 18432, 46, }, /* 38 */ { 0, 5, 12, 1, -32, 18432, 46, }, /* 39 */ - { 69, 0, 2, 0, 0, 6144, 0, }, /* 40 */ - { 69, 0, 2, 0, 0, 4096, 50, }, /* 41 */ - { 69, 29, 12, 0, 0, 8192, 52, }, /* 42 */ - { 69, 21, 12, 0, 0, 28672, 54, }, /* 43 */ - { 69, 23, 12, 0, 0, 14336, 54, }, /* 44 */ - { 69, 26, 12, 0, 0, 28672, 54, }, /* 45 */ - { 69, 24, 12, 0, 0, 28672, 56, }, /* 46 */ - { 69, 26, 14, 0, 0, 28672, 58, }, /* 47 */ + { 99, 0, 2, 0, 0, 6144, 0, }, /* 40 */ + { 99, 0, 2, 0, 0, 4096, 50, }, /* 41 */ + { 99, 29, 12, 0, 0, 8192, 52, }, /* 42 */ + { 99, 21, 12, 0, 0, 28672, 54, }, /* 43 */ + { 99, 23, 12, 0, 0, 14336, 54, }, /* 44 */ + { 99, 26, 12, 0, 0, 28672, 54, }, /* 45 */ + { 99, 24, 12, 0, 0, 28672, 56, }, /* 46 */ + { 99, 26, 14, 0, 0, 28672, 58, }, /* 47 */ { 0, 7, 12, 0, 0, 18432, 60, }, /* 48 */ - { 69, 20, 12, 0, 0, 28672, 62, }, /* 49 */ - { 69, 25, 12, 0, 0, 28672, 64, }, /* 50 */ - { 69, 1, 2, 0, 0, 6144, 66, }, /* 51 */ - { 69, 26, 12, 0, 0, 14336, 54, }, /* 52 */ - { 69, 25, 12, 0, 0, 14336, 64, }, /* 53 */ - { 69, 15, 12, 0, 0, 10240, 68, }, /* 54 */ - { 69, 5, 12, 26, 775, 18432, 70, }, /* 55 */ - { 69, 21, 12, 0, 0, 28672, 72, }, /* 56 */ - { 69, 19, 12, 0, 0, 28672, 62, }, /* 57 */ - { 69, 15, 12, 0, 0, 28672, 68, }, /* 58 */ - { 0, 9, 12, 0, 32, 18432, 74, }, /* 59 */ - { 0, 9, 12, 104, 32, 18432, 74, }, /* 60 */ + { 99, 20, 12, 0, 0, 28672, 62, }, /* 49 */ + { 99, 25, 12, 0, 0, 28672, 64, }, /* 50 */ + { 99, 1, 2, 0, 0, 6144, 66, }, /* 51 */ + { 99, 26, 12, 0, 0, 14336, 54, }, /* 52 */ + { 99, 25, 12, 0, 0, 14336, 64, }, /* 53 */ + { 99, 15, 12, 0, 0, 10240, 68, }, /* 54 */ + { 99, 5, 12, 26, 775, 18432, 70, }, /* 55 */ + { 99, 21, 12, 0, 0, 28676, 72, }, /* 56 */ + { 99, 19, 12, 0, 0, 28672, 62, }, /* 57 */ + { 99, 15, 12, 0, 0, 28672, 74, }, /* 58 */ + { 0, 9, 12, 0, 32, 18432, 76, }, /* 59 */ + { 0, 9, 12, 104, 32, 18432, 76, }, /* 60 */ { 0, 5, 12, 0, 7615, 18432, 70, }, /* 61 */ - { 0, 5, 12, 0, -32, 18432, 76, }, /* 62 */ - { 0, 5, 12, 104, -32, 18432, 76, }, /* 63 */ - { 0, 5, 12, 0, 121, 18432, 76, }, /* 64 */ - { 0, 9, 12, 0, 1, 18432, 74, }, /* 65 */ - { 0, 5, 12, 0, -1, 18432, 76, }, /* 66 */ - { 0, 5, 12, 0, -1, 18432, 78, }, /* 67 */ - { 0, 9, 12, 0, 0, 18432, 74, }, /* 68 */ - { 0, 5, 12, 0, 0, 18432, 76, }, /* 69 */ + { 0, 5, 12, 0, -32, 18432, 78, }, /* 62 */ + { 0, 5, 12, 104, -32, 18432, 78, }, /* 63 */ + { 0, 5, 12, 0, 121, 18432, 78, }, /* 64 */ + { 0, 9, 12, 0, 1, 18432, 76, }, /* 65 */ + { 0, 5, 12, 0, -1, 18432, 78, }, /* 66 */ + { 0, 5, 12, 0, -1, 18432, 80, }, /* 67 */ + { 0, 9, 12, 0, 0, 18432, 76, }, /* 68 */ + { 0, 5, 12, 0, 0, 18432, 78, }, /* 69 */ { 0, 5, 12, 0, 0, 18432, 60, }, /* 70 */ - { 0, 5, 12, 0, 0, 18432, 80, }, /* 71 */ - { 0, 9, 12, 0, -121, 18432, 74, }, /* 72 */ + { 0, 5, 12, 0, 0, 18432, 82, }, /* 71 */ + { 0, 9, 12, 0, -121, 18432, 76, }, /* 72 */ { 0, 5, 12, 1, 0, 18432, 70, }, /* 73 */ - { 0, 5, 12, 0, 195, 18432, 76, }, /* 74 */ - { 0, 9, 12, 0, 210, 18432, 74, }, /* 75 */ - { 0, 9, 12, 0, 206, 18432, 74, }, /* 76 */ - { 0, 9, 12, 0, 205, 18432, 74, }, /* 77 */ - { 0, 9, 12, 0, 79, 18432, 74, }, /* 78 */ - { 0, 9, 12, 0, 202, 18432, 74, }, /* 79 */ - { 0, 9, 12, 0, 203, 18432, 74, }, /* 80 */ - { 0, 9, 12, 0, 207, 18432, 74, }, /* 81 */ - { 0, 5, 12, 0, 97, 18432, 76, }, /* 82 */ - { 0, 9, 12, 0, 211, 18432, 74, }, /* 83 */ - { 0, 9, 12, 0, 209, 18432, 74, }, /* 84 */ - { 0, 5, 12, 0, 163, 18432, 76, }, /* 85 */ - { 0, 9, 12, 0, 213, 18432, 74, }, /* 86 */ - { 0, 5, 12, 0, 130, 18432, 76, }, /* 87 */ - { 0, 9, 12, 0, 214, 18432, 74, }, /* 88 */ - { 0, 9, 12, 0, 218, 18432, 74, }, /* 89 */ - { 0, 9, 12, 0, 217, 18432, 74, }, /* 90 */ - { 0, 9, 12, 0, 219, 18432, 74, }, /* 91 */ - { 0, 7, 12, 0, 0, 18432, 82, }, /* 92 */ - { 0, 5, 12, 0, 56, 18432, 76, }, /* 93 */ - { 0, 9, 12, 5, 2, 18432, 84, }, /* 94 */ - { 0, 8, 12, 5, 1, 18432, 86, }, /* 95 */ - { 0, 5, 12, 5, -2, 18432, 76, }, /* 96 */ - { 0, 9, 12, 9, 2, 18432, 84, }, /* 97 */ - { 0, 8, 12, 9, 1, 18432, 86, }, /* 98 */ - { 0, 5, 12, 9, -2, 18432, 76, }, /* 99 */ - { 0, 9, 12, 13, 2, 18432, 84, }, /* 100 */ - { 0, 8, 12, 13, 1, 18432, 86, }, /* 101 */ - { 0, 5, 12, 13, -2, 18432, 76, }, /* 102 */ - { 0, 5, 12, 0, -79, 18432, 76, }, /* 103 */ - { 0, 9, 12, 17, 2, 18432, 84, }, /* 104 */ - { 0, 8, 12, 17, 1, 18432, 86, }, /* 105 */ - { 0, 5, 12, 17, -2, 18432, 76, }, /* 106 */ - { 0, 9, 12, 0, -97, 18432, 74, }, /* 107 */ - { 0, 9, 12, 0, -56, 18432, 74, }, /* 108 */ - { 0, 9, 12, 0, -130, 18432, 74, }, /* 109 */ - { 0, 9, 12, 0, 10795, 18432, 74, }, /* 110 */ - { 0, 9, 12, 0, -163, 18432, 74, }, /* 111 */ - { 0, 9, 12, 0, 10792, 18432, 74, }, /* 112 */ - { 0, 5, 12, 0, 10815, 18432, 76, }, /* 113 */ - { 0, 9, 12, 0, -195, 18432, 74, }, /* 114 */ - { 0, 9, 12, 0, 69, 18432, 74, }, /* 115 */ - { 0, 9, 12, 0, 71, 18432, 74, }, /* 116 */ - { 0, 5, 12, 0, 10783, 18432, 76, }, /* 117 */ - { 0, 5, 12, 0, 10780, 18432, 76, }, /* 118 */ - { 0, 5, 12, 0, 10782, 18432, 76, }, /* 119 */ - { 0, 5, 12, 0, -210, 18432, 76, }, /* 120 */ - { 0, 5, 12, 0, -206, 18432, 76, }, /* 121 */ - { 0, 5, 12, 0, -205, 18432, 76, }, /* 122 */ - { 0, 5, 12, 0, -202, 18432, 76, }, /* 123 */ - { 0, 5, 12, 0, -203, 18432, 76, }, /* 124 */ - { 0, 5, 12, 0, 42319, 18432, 76, }, /* 125 */ - { 0, 5, 12, 0, 42315, 18432, 76, }, /* 126 */ - { 0, 5, 12, 0, -207, 18432, 76, }, /* 127 */ - { 0, 5, 12, 0, 42280, 18432, 76, }, /* 128 */ - { 0, 5, 12, 0, 42308, 18432, 76, }, /* 129 */ - { 0, 5, 12, 0, -209, 18432, 78, }, /* 130 */ - { 0, 5, 12, 0, -211, 18432, 76, }, /* 131 */ - { 0, 5, 12, 0, 10743, 18432, 76, }, /* 132 */ - { 0, 5, 12, 0, 42305, 18432, 76, }, /* 133 */ - { 0, 5, 12, 0, 10749, 18432, 76, }, /* 134 */ - { 0, 5, 12, 0, -213, 18432, 76, }, /* 135 */ - { 0, 5, 12, 0, -214, 18432, 76, }, /* 136 */ - { 0, 5, 12, 0, 10727, 18432, 76, }, /* 137 */ - { 0, 5, 12, 0, -218, 18432, 76, }, /* 138 */ - { 0, 5, 12, 0, 42307, 18432, 76, }, /* 139 */ - { 0, 5, 12, 0, 42282, 18432, 76, }, /* 140 */ - { 0, 5, 12, 0, -69, 18432, 76, }, /* 141 */ - { 0, 5, 12, 0, -217, 18432, 76, }, /* 142 */ - { 0, 5, 12, 0, -71, 18432, 76, }, /* 143 */ - { 0, 5, 12, 0, -219, 18432, 76, }, /* 144 */ - { 0, 5, 12, 0, 42261, 18432, 78, }, /* 145 */ - { 0, 5, 12, 0, 42258, 18432, 76, }, /* 146 */ - { 0, 6, 12, 0, 0, 18432, 88, }, /* 147 */ - { 0, 6, 12, 0, 0, 18432, 90, }, /* 148 */ - { 69, 6, 12, 0, 0, 28672, 92, }, /* 149 */ - { 69, 6, 12, 0, 0, 18432, 92, }, /* 150 */ - { 69, 6, 12, 0, 0, 18432, 88, }, /* 151 */ - { 69, 6, 12, 0, 0, 18432, 94, }, /* 152 */ - { 22, 24, 12, 0, 0, 28672, 56, }, /* 153 */ - { 84, 12, 3, 0, 0, 26624, 96, }, /* 154 */ - { 84, 12, 3, 0, 0, 26636, 96, }, /* 155 */ - { 84, 12, 3, 21, 116, 26636, 98, }, /* 156 */ - { 84, 12, 3, 0, 0, 26624, 100, }, /* 157 */ - { 84, 12, 3, 0, 0, 26624, 102, }, /* 158 */ - { 84, 12, 3, 0, 0, 26642, 102, }, /* 159 */ - { 1, 9, 12, 0, 1, 18432, 74, }, /* 160 */ - { 1, 5, 12, 0, -1, 18432, 76, }, /* 161 */ - { 1, 24, 12, 0, 0, 28672, 56, }, /* 162 */ - { 68, 2, 12, 0, 0, 18432, 0, }, /* 163 */ - { 1, 6, 12, 0, 0, 18432, 104, }, /* 164 */ - { 1, 5, 12, 0, 130, 18432, 76, }, /* 165 */ - { 69, 21, 12, 0, 0, 28672, 106, }, /* 166 */ - { 1, 9, 12, 0, 116, 18432, 74, }, /* 167 */ - { 1, 9, 12, 0, 38, 18432, 74, }, /* 168 */ - { 69, 21, 12, 0, 0, 28672, 108, }, /* 169 */ - { 1, 9, 12, 0, 37, 18432, 74, }, /* 170 */ - { 1, 9, 12, 0, 64, 18432, 74, }, /* 171 */ - { 1, 9, 12, 0, 63, 18432, 74, }, /* 172 */ - { 1, 5, 12, 0, 0, 18432, 76, }, /* 173 */ - { 1, 9, 12, 0, 32, 18432, 74, }, /* 174 */ - { 1, 9, 12, 34, 32, 18432, 74, }, /* 175 */ - { 1, 9, 12, 59, 32, 18432, 74, }, /* 176 */ - { 1, 9, 12, 38, 32, 18432, 74, }, /* 177 */ - { 1, 9, 12, 21, 32, 18432, 74, }, /* 178 */ - { 1, 9, 12, 51, 32, 18432, 74, }, /* 179 */ - { 1, 9, 12, 26, 32, 18432, 74, }, /* 180 */ - { 1, 9, 12, 47, 32, 18432, 74, }, /* 181 */ - { 1, 9, 12, 55, 32, 18432, 74, }, /* 182 */ - { 1, 9, 12, 30, 32, 18432, 74, }, /* 183 */ - { 1, 9, 12, 43, 32, 18432, 74, }, /* 184 */ - { 1, 9, 12, 96, 32, 18432, 74, }, /* 185 */ - { 1, 5, 12, 0, -38, 18432, 76, }, /* 186 */ - { 1, 5, 12, 0, -37, 18432, 76, }, /* 187 */ - { 1, 5, 12, 0, -32, 18432, 76, }, /* 188 */ - { 1, 5, 12, 34, -32, 18432, 76, }, /* 189 */ - { 1, 5, 12, 59, -32, 18432, 76, }, /* 190 */ - { 1, 5, 12, 38, -32, 18432, 76, }, /* 191 */ - { 1, 5, 12, 21, -116, 18432, 76, }, /* 192 */ - { 1, 5, 12, 51, -32, 18432, 76, }, /* 193 */ - { 1, 5, 12, 26, -775, 18432, 76, }, /* 194 */ - { 1, 5, 12, 47, -32, 18432, 76, }, /* 195 */ - { 1, 5, 12, 55, -32, 18432, 76, }, /* 196 */ - { 1, 5, 12, 30, 1, 18432, 70, }, /* 197 */ - { 1, 5, 12, 30, -32, 18432, 76, }, /* 198 */ - { 1, 5, 12, 43, -32, 18432, 76, }, /* 199 */ - { 1, 5, 12, 96, -32, 18432, 76, }, /* 200 */ - { 1, 5, 12, 0, -64, 18432, 76, }, /* 201 */ - { 1, 5, 12, 0, -63, 18432, 76, }, /* 202 */ - { 1, 9, 12, 0, 8, 18432, 74, }, /* 203 */ - { 1, 5, 12, 34, -30, 18432, 110, }, /* 204 */ - { 1, 5, 12, 38, -25, 18432, 110, }, /* 205 */ - { 1, 9, 12, 0, 0, 18432, 112, }, /* 206 */ - { 1, 9, 12, 0, 0, 18432, 114, }, /* 207 */ - { 1, 5, 12, 43, -15, 18432, 110, }, /* 208 */ - { 1, 5, 12, 47, -22, 18432, 70, }, /* 209 */ - { 1, 5, 12, 0, -8, 18432, 76, }, /* 210 */ - { 34, 9, 12, 0, 1, 18432, 74, }, /* 211 */ - { 34, 5, 12, 0, -1, 18432, 76, }, /* 212 */ - { 1, 5, 12, 51, -54, 18432, 110, }, /* 213 */ - { 1, 5, 12, 55, -48, 18432, 110, }, /* 214 */ - { 1, 5, 12, 0, 7, 18432, 76, }, /* 215 */ - { 1, 5, 12, 0, -116, 18432, 78, }, /* 216 */ - { 1, 9, 12, 38, -60, 18432, 116, }, /* 217 */ - { 1, 5, 12, 59, -64, 18432, 110, }, /* 218 */ - { 1, 25, 12, 0, 0, 28672, 118, }, /* 219 */ - { 1, 9, 12, 0, -7, 18432, 74, }, /* 220 */ - { 1, 5, 12, 0, 0, 18432, 60, }, /* 221 */ - { 1, 9, 12, 0, -130, 18432, 74, }, /* 222 */ - { 2, 9, 12, 0, 80, 18432, 74, }, /* 223 */ - { 2, 9, 12, 0, 32, 18432, 74, }, /* 224 */ - { 2, 9, 12, 63, 32, 18432, 74, }, /* 225 */ - { 2, 9, 12, 67, 32, 18432, 74, }, /* 226 */ - { 2, 9, 12, 71, 32, 18432, 74, }, /* 227 */ - { 2, 9, 12, 75, 32, 18432, 74, }, /* 228 */ - { 2, 9, 12, 79, 32, 18432, 74, }, /* 229 */ - { 2, 9, 12, 84, 32, 18432, 74, }, /* 230 */ - { 2, 5, 12, 0, -32, 18432, 76, }, /* 231 */ - { 2, 5, 12, 63, -32, 18432, 76, }, /* 232 */ - { 2, 5, 12, 67, -32, 18432, 76, }, /* 233 */ - { 2, 5, 12, 71, -32, 18432, 76, }, /* 234 */ - { 2, 5, 12, 75, -32, 18432, 76, }, /* 235 */ - { 2, 5, 12, 79, -32, 18432, 76, }, /* 236 */ - { 2, 5, 12, 84, -32, 18432, 76, }, /* 237 */ - { 2, 5, 12, 0, -80, 18432, 76, }, /* 238 */ - { 2, 5, 12, 0, -80, 18432, 78, }, /* 239 */ - { 2, 9, 12, 0, 1, 18432, 74, }, /* 240 */ - { 2, 5, 12, 0, -1, 18432, 76, }, /* 241 */ - { 2, 9, 12, 88, 1, 18432, 74, }, /* 242 */ - { 2, 5, 12, 88, -1, 18432, 76, }, /* 243 */ - { 2, 26, 12, 0, 0, 18432, 68, }, /* 244 */ - { 2, 12, 3, 0, 0, 26684, 96, }, /* 245 */ - { 2, 12, 3, 0, 0, 26678, 96, }, /* 246 */ - { 84, 12, 3, 0, 0, 26681, 96, }, /* 247 */ - { 2, 11, 3, 0, 0, 26624, 120, }, /* 248 */ - { 2, 9, 12, 0, 15, 18432, 74, }, /* 249 */ - { 2, 5, 12, 0, -15, 18432, 76, }, /* 250 */ - { 70, 9, 12, 0, 48, 18432, 74, }, /* 251 */ - { 70, 6, 12, 0, 0, 18432, 92, }, /* 252 */ - { 70, 21, 12, 0, 0, 18432, 68, }, /* 253 */ - { 70, 21, 12, 0, 0, 18432, 122, }, /* 254 */ - { 70, 5, 12, 0, 0, 18432, 60, }, /* 255 */ - { 70, 5, 12, 0, -48, 18432, 76, }, /* 256 */ - { 70, 5, 12, 0, 0, 18432, 70, }, /* 257 */ - { 70, 21, 12, 0, 0, 18432, 124, }, /* 258 */ - { 70, 17, 12, 0, 0, 28672, 126, }, /* 259 */ - { 70, 26, 12, 0, 0, 28672, 68, }, /* 260 */ - { 70, 23, 12, 0, 0, 14336, 68, }, /* 261 */ - { 68, 2, 12, 0, 0, 34816, 0, }, /* 262 */ - { 71, 12, 3, 0, 0, 26624, 96, }, /* 263 */ - { 71, 12, 3, 0, 0, 26624, 102, }, /* 264 */ - { 71, 12, 3, 0, 0, 26624, 128, }, /* 265 */ - { 71, 17, 12, 0, 0, 34816, 126, }, /* 266 */ - { 71, 21, 12, 0, 0, 34816, 68, }, /* 267 */ - { 71, 21, 12, 0, 0, 34816, 106, }, /* 268 */ - { 71, 12, 3, 0, 0, 26624, 130, }, /* 269 */ - { 71, 7, 12, 0, 0, 34816, 82, }, /* 270 */ - { 71, 21, 12, 0, 0, 34816, 122, }, /* 271 */ - { 3, 1, 4, 0, 0, 2048, 132, }, /* 272 */ - { 69, 1, 4, 0, 0, 2048, 132, }, /* 273 */ - { 3, 25, 12, 0, 0, 28672, 118, }, /* 274 */ - { 3, 25, 12, 0, 0, 0, 118, }, /* 275 */ - { 3, 21, 12, 0, 0, 14336, 68, }, /* 276 */ - { 3, 23, 12, 0, 0, 0, 68, }, /* 277 */ - { 69, 21, 12, 0, 0, 8342, 106, }, /* 278 */ - { 3, 21, 12, 0, 0, 0, 68, }, /* 279 */ - { 3, 26, 12, 0, 0, 28672, 68, }, /* 280 */ - { 3, 12, 3, 0, 0, 26624, 130, }, /* 281 */ - { 69, 21, 12, 0, 0, 150, 106, }, /* 282 */ - { 3, 1, 2, 0, 0, 108, 134, }, /* 283 */ - { 3, 21, 12, 0, 0, 0, 124, }, /* 284 */ - { 69, 21, 12, 0, 0, 159, 124, }, /* 285 */ - { 3, 7, 12, 0, 0, 0, 82, }, /* 286 */ - { 69, 6, 12, 0, 0, 165, 136, }, /* 287 */ - { 84, 12, 3, 0, 0, 26660, 128, }, /* 288 */ - { 84, 12, 3, 0, 0, 26660, 130, }, /* 289 */ - { 3, 12, 3, 0, 0, 26624, 128, }, /* 290 */ - { 3, 12, 3, 0, 0, 26624, 96, }, /* 291 */ - { 3, 13, 12, 0, 0, 2159, 138, }, /* 292 */ - { 3, 21, 12, 0, 0, 2048, 68, }, /* 293 */ - { 3, 7, 12, 0, 0, 0, 140, }, /* 294 */ - { 3, 21, 12, 0, 0, 30, 124, }, /* 295 */ - { 3, 6, 12, 0, 0, 0, 92, }, /* 296 */ - { 3, 13, 12, 0, 0, 10240, 138, }, /* 297 */ - { 3, 26, 12, 0, 0, 0, 68, }, /* 298 */ - { 4, 21, 12, 0, 0, 0, 124, }, /* 299 */ - { 4, 21, 12, 0, 0, 0, 106, }, /* 300 */ - { 4, 21, 12, 0, 0, 0, 68, }, /* 301 */ - { 68, 2, 12, 0, 0, 0, 0, }, /* 302 */ - { 4, 1, 4, 0, 0, 0, 132, }, /* 303 */ - { 4, 7, 12, 0, 0, 0, 82, }, /* 304 */ - { 4, 12, 3, 0, 0, 26624, 130, }, /* 305 */ - { 4, 12, 3, 0, 0, 26624, 128, }, /* 306 */ - { 4, 12, 3, 0, 0, 26624, 96, }, /* 307 */ - { 5, 7, 12, 0, 0, 0, 82, }, /* 308 */ - { 5, 12, 3, 0, 0, 26624, 128, }, /* 309 */ - { 38, 13, 12, 0, 0, 34816, 138, }, /* 310 */ - { 38, 7, 12, 0, 0, 34816, 82, }, /* 311 */ - { 38, 12, 3, 0, 0, 26624, 96, }, /* 312 */ - { 38, 6, 12, 0, 0, 34816, 92, }, /* 313 */ - { 38, 26, 12, 0, 0, 28672, 68, }, /* 314 */ - { 38, 21, 12, 0, 0, 28672, 68, }, /* 315 */ - { 38, 21, 12, 0, 0, 28672, 106, }, /* 316 */ - { 38, 21, 12, 0, 0, 28672, 124, }, /* 317 */ - { 38, 6, 12, 0, 0, 34816, 136, }, /* 318 */ - { 38, 12, 3, 0, 0, 26624, 102, }, /* 319 */ - { 38, 23, 12, 0, 0, 34816, 68, }, /* 320 */ - { 110, 7, 12, 0, 0, 34816, 82, }, /* 321 */ - { 110, 12, 3, 0, 0, 26624, 130, }, /* 322 */ - { 110, 12, 3, 0, 0, 26624, 96, }, /* 323 */ - { 110, 6, 12, 0, 0, 34816, 142, }, /* 324 */ - { 110, 12, 3, 0, 0, 26624, 102, }, /* 325 */ - { 110, 21, 12, 0, 0, 34816, 106, }, /* 326 */ - { 110, 21, 12, 0, 0, 34816, 124, }, /* 327 */ - { 42, 7, 12, 0, 0, 34816, 82, }, /* 328 */ - { 42, 12, 3, 0, 0, 26624, 102, }, /* 329 */ - { 42, 21, 12, 0, 0, 34816, 106, }, /* 330 */ - { 3, 24, 12, 0, 0, 0, 122, }, /* 331 */ - { 3, 12, 3, 0, 0, 26624, 102, }, /* 332 */ - { 6, 12, 3, 0, 0, 26624, 130, }, /* 333 */ - { 6, 10, 5, 0, 0, 18432, 144, }, /* 334 */ - { 6, 7, 12, 0, 0, 18432, 82, }, /* 335 */ - { 6, 12, 3, 0, 0, 26624, 96, }, /* 336 */ - { 6, 12, 3, 0, 0, 26624, 146, }, /* 337 */ - { 84, 12, 3, 0, 0, 26798, 96, }, /* 338 */ - { 84, 12, 3, 0, 0, 26795, 96, }, /* 339 */ - { 69, 21, 12, 0, 0, 18615, 124, }, /* 340 */ - { 69, 21, 12, 0, 0, 18618, 124, }, /* 341 */ - { 6, 13, 12, 0, 0, 18576, 138, }, /* 342 */ - { 6, 21, 12, 0, 0, 18432, 68, }, /* 343 */ - { 6, 6, 12, 0, 0, 18432, 92, }, /* 344 */ - { 7, 7, 12, 0, 0, 18432, 82, }, /* 345 */ - { 7, 12, 3, 0, 0, 26624, 130, }, /* 346 */ - { 7, 10, 5, 0, 0, 18432, 144, }, /* 347 */ - { 7, 12, 3, 0, 0, 26624, 96, }, /* 348 */ - { 7, 10, 3, 0, 0, 18432, 148, }, /* 349 */ - { 7, 12, 3, 0, 0, 26624, 146, }, /* 350 */ - { 7, 13, 12, 0, 0, 18546, 138, }, /* 351 */ - { 7, 23, 12, 0, 0, 14336, 68, }, /* 352 */ - { 7, 15, 12, 0, 0, 18432, 68, }, /* 353 */ - { 7, 26, 12, 0, 0, 18432, 68, }, /* 354 */ - { 7, 21, 12, 0, 0, 18432, 68, }, /* 355 */ - { 7, 12, 3, 0, 0, 26624, 102, }, /* 356 */ - { 8, 12, 3, 0, 0, 26624, 130, }, /* 357 */ - { 8, 10, 5, 0, 0, 18432, 144, }, /* 358 */ - { 8, 7, 12, 0, 0, 18432, 82, }, /* 359 */ - { 8, 12, 3, 0, 0, 26624, 96, }, /* 360 */ - { 8, 12, 3, 0, 0, 26624, 146, }, /* 361 */ - { 8, 13, 12, 0, 0, 18519, 138, }, /* 362 */ - { 8, 21, 12, 0, 0, 18432, 68, }, /* 363 */ - { 9, 12, 3, 0, 0, 26624, 130, }, /* 364 */ - { 9, 10, 5, 0, 0, 18432, 144, }, /* 365 */ - { 9, 7, 12, 0, 0, 18432, 82, }, /* 366 */ - { 9, 12, 3, 0, 0, 26624, 96, }, /* 367 */ - { 9, 12, 3, 0, 0, 26624, 146, }, /* 368 */ - { 9, 13, 12, 0, 0, 18516, 138, }, /* 369 */ - { 9, 21, 12, 0, 0, 18432, 68, }, /* 370 */ - { 9, 23, 12, 0, 0, 14336, 68, }, /* 371 */ - { 10, 12, 3, 0, 0, 26624, 130, }, /* 372 */ - { 10, 10, 5, 0, 0, 18432, 144, }, /* 373 */ - { 10, 7, 12, 0, 0, 18432, 82, }, /* 374 */ - { 10, 12, 3, 0, 0, 26624, 96, }, /* 375 */ - { 10, 10, 3, 0, 0, 18432, 148, }, /* 376 */ - { 10, 12, 3, 0, 0, 26624, 146, }, /* 377 */ - { 10, 12, 3, 0, 0, 26624, 150, }, /* 378 */ - { 10, 13, 12, 0, 0, 18432, 138, }, /* 379 */ - { 10, 26, 12, 0, 0, 18432, 68, }, /* 380 */ - { 10, 15, 12, 0, 0, 18432, 68, }, /* 381 */ - { 11, 12, 3, 0, 0, 26624, 130, }, /* 382 */ - { 11, 7, 12, 0, 0, 18432, 82, }, /* 383 */ - { 11, 10, 3, 0, 0, 18432, 148, }, /* 384 */ - { 11, 10, 5, 0, 0, 18432, 144, }, /* 385 */ - { 11, 12, 3, 0, 0, 26624, 146, }, /* 386 */ - { 11, 13, 12, 0, 0, 18513, 138, }, /* 387 */ - { 11, 15, 12, 0, 0, 18513, 68, }, /* 388 */ - { 11, 26, 12, 0, 0, 28753, 68, }, /* 389 */ - { 11, 26, 12, 0, 0, 28672, 68, }, /* 390 */ - { 11, 23, 12, 0, 0, 14336, 68, }, /* 391 */ - { 12, 12, 3, 0, 0, 26624, 130, }, /* 392 */ - { 12, 10, 5, 0, 0, 18432, 144, }, /* 393 */ - { 12, 7, 12, 0, 0, 18432, 82, }, /* 394 */ - { 12, 12, 3, 0, 0, 26624, 96, }, /* 395 */ - { 12, 12, 3, 0, 0, 26624, 146, }, /* 396 */ - { 12, 13, 12, 0, 0, 18432, 138, }, /* 397 */ - { 12, 21, 12, 0, 0, 18432, 68, }, /* 398 */ - { 12, 15, 12, 0, 0, 28672, 68, }, /* 399 */ - { 12, 26, 12, 0, 0, 18432, 68, }, /* 400 */ - { 13, 7, 12, 0, 0, 18432, 82, }, /* 401 */ - { 13, 12, 3, 0, 0, 26624, 130, }, /* 402 */ - { 13, 10, 5, 0, 0, 18432, 144, }, /* 403 */ - { 13, 21, 12, 0, 0, 18432, 68, }, /* 404 */ - { 13, 12, 3, 0, 0, 26624, 96, }, /* 405 */ - { 13, 12, 3, 0, 0, 18432, 130, }, /* 406 */ - { 13, 10, 3, 0, 0, 18432, 148, }, /* 407 */ - { 13, 12, 3, 0, 0, 26624, 146, }, /* 408 */ - { 13, 13, 12, 0, 0, 18528, 138, }, /* 409 */ - { 14, 12, 3, 0, 0, 26624, 130, }, /* 410 */ - { 14, 10, 5, 0, 0, 18432, 144, }, /* 411 */ - { 14, 7, 12, 0, 0, 18432, 82, }, /* 412 */ - { 14, 12, 3, 0, 0, 26624, 146, }, /* 413 */ - { 14, 10, 3, 0, 0, 18432, 148, }, /* 414 */ - { 14, 7, 4, 0, 0, 18432, 82, }, /* 415 */ - { 14, 26, 12, 0, 0, 18432, 68, }, /* 416 */ - { 14, 15, 12, 0, 0, 18432, 68, }, /* 417 */ - { 14, 13, 12, 0, 0, 18432, 138, }, /* 418 */ - { 15, 12, 3, 0, 0, 26624, 130, }, /* 419 */ - { 15, 10, 5, 0, 0, 18432, 144, }, /* 420 */ - { 15, 7, 12, 0, 0, 18432, 82, }, /* 421 */ - { 15, 12, 3, 0, 0, 26624, 146, }, /* 422 */ - { 15, 10, 3, 0, 0, 18432, 148, }, /* 423 */ - { 15, 13, 12, 0, 0, 18432, 138, }, /* 424 */ - { 15, 21, 12, 0, 0, 18432, 68, }, /* 425 */ - { 72, 7, 12, 0, 0, 18432, 82, }, /* 426 */ - { 72, 12, 3, 0, 0, 26624, 130, }, /* 427 */ - { 72, 7, 5, 0, 0, 18432, 152, }, /* 428 */ - { 72, 12, 3, 0, 0, 26624, 154, }, /* 429 */ - { 69, 23, 12, 0, 0, 14336, 68, }, /* 430 */ - { 72, 7, 12, 0, 0, 18432, 156, }, /* 431 */ - { 72, 6, 12, 0, 0, 18432, 136, }, /* 432 */ - { 72, 12, 3, 0, 0, 26624, 96, }, /* 433 */ - { 72, 21, 12, 0, 0, 18432, 68, }, /* 434 */ - { 72, 13, 12, 0, 0, 18432, 138, }, /* 435 */ - { 72, 21, 12, 0, 0, 18432, 106, }, /* 436 */ - { 73, 7, 12, 0, 0, 18432, 82, }, /* 437 */ - { 73, 12, 3, 0, 0, 26624, 130, }, /* 438 */ - { 73, 7, 5, 0, 0, 18432, 152, }, /* 439 */ - { 73, 12, 3, 0, 0, 26624, 146, }, /* 440 */ - { 73, 7, 12, 0, 0, 18432, 156, }, /* 441 */ - { 73, 6, 12, 0, 0, 18432, 136, }, /* 442 */ - { 73, 12, 3, 0, 0, 26624, 96, }, /* 443 */ - { 73, 12, 3, 0, 0, 26624, 102, }, /* 444 */ - { 73, 13, 12, 0, 0, 18432, 138, }, /* 445 */ - { 74, 7, 12, 0, 0, 18432, 82, }, /* 446 */ - { 74, 26, 12, 0, 0, 18432, 68, }, /* 447 */ - { 74, 21, 12, 0, 0, 18432, 68, }, /* 448 */ - { 74, 21, 12, 0, 0, 18432, 106, }, /* 449 */ - { 74, 12, 3, 0, 0, 26624, 96, }, /* 450 */ - { 74, 13, 12, 0, 0, 18432, 138, }, /* 451 */ - { 74, 15, 12, 0, 0, 18432, 68, }, /* 452 */ - { 74, 22, 12, 0, 0, 28672, 158, }, /* 453 */ - { 74, 18, 12, 0, 0, 28672, 158, }, /* 454 */ - { 74, 10, 5, 0, 0, 18432, 160, }, /* 455 */ - { 74, 12, 3, 0, 0, 26624, 130, }, /* 456 */ - { 74, 12, 3, 0, 0, 26624, 162, }, /* 457 */ - { 74, 10, 5, 0, 0, 18432, 144, }, /* 458 */ - { 74, 12, 3, 0, 0, 26624, 128, }, /* 459 */ - { 74, 12, 3, 0, 0, 26624, 146, }, /* 460 */ - { 69, 26, 12, 0, 0, 18432, 68, }, /* 461 */ - { 16, 7, 12, 0, 0, 18432, 82, }, /* 462 */ - { 16, 10, 12, 0, 0, 18432, 144, }, /* 463 */ - { 16, 12, 3, 0, 0, 26624, 130, }, /* 464 */ - { 16, 10, 5, 0, 0, 18432, 144, }, /* 465 */ - { 16, 12, 3, 0, 0, 26624, 96, }, /* 466 */ - { 16, 12, 3, 0, 0, 26624, 146, }, /* 467 */ - { 16, 13, 12, 0, 0, 18549, 138, }, /* 468 */ - { 16, 21, 12, 0, 0, 18432, 124, }, /* 469 */ - { 16, 21, 12, 0, 0, 18432, 68, }, /* 470 */ - { 16, 10, 12, 0, 0, 18432, 164, }, /* 471 */ - { 16, 12, 3, 0, 0, 26624, 128, }, /* 472 */ - { 16, 13, 12, 0, 0, 18432, 138, }, /* 473 */ - { 16, 26, 12, 0, 0, 18432, 68, }, /* 474 */ - { 17, 9, 12, 0, 7264, 18432, 74, }, /* 475 */ - { 17, 5, 12, 0, 3008, 18432, 166, }, /* 476 */ - { 69, 21, 12, 0, 0, 18510, 68, }, /* 477 */ - { 17, 6, 12, 0, 0, 18432, 168, }, /* 478 */ - { 18, 7, 6, 0, 0, 18432, 82, }, /* 479 */ - { 18, 7, 6, 0, 0, 18432, 170, }, /* 480 */ - { 18, 7, 7, 0, 0, 18432, 170, }, /* 481 */ - { 18, 7, 7, 0, 0, 18432, 82, }, /* 482 */ - { 18, 7, 8, 0, 0, 18432, 82, }, /* 483 */ - { 75, 7, 12, 0, 0, 18432, 82, }, /* 484 */ - { 75, 12, 3, 0, 0, 26624, 96, }, /* 485 */ - { 75, 21, 12, 0, 0, 18432, 68, }, /* 486 */ - { 75, 21, 12, 0, 0, 18432, 106, }, /* 487 */ - { 75, 21, 12, 0, 0, 18432, 124, }, /* 488 */ - { 75, 15, 12, 0, 0, 18432, 138, }, /* 489 */ - { 75, 15, 12, 0, 0, 18432, 68, }, /* 490 */ - { 75, 26, 12, 0, 0, 28672, 68, }, /* 491 */ - { 76, 9, 12, 0, 38864, 18432, 172, }, /* 492 */ - { 76, 9, 12, 0, 8, 18432, 172, }, /* 493 */ - { 76, 5, 12, 0, -8, 18432, 70, }, /* 494 */ - { 77, 17, 12, 0, 0, 28672, 126, }, /* 495 */ - { 77, 7, 12, 0, 0, 18432, 82, }, /* 496 */ - { 77, 26, 12, 0, 0, 18432, 68, }, /* 497 */ - { 77, 21, 12, 0, 0, 18432, 124, }, /* 498 */ - { 78, 29, 12, 0, 0, 45056, 52, }, /* 499 */ - { 78, 7, 12, 0, 0, 18432, 82, }, /* 500 */ - { 78, 22, 12, 0, 0, 28672, 158, }, /* 501 */ - { 78, 18, 12, 0, 0, 28672, 158, }, /* 502 */ - { 79, 7, 12, 0, 0, 18432, 82, }, /* 503 */ - { 69, 21, 12, 0, 0, 18432, 106, }, /* 504 */ - { 79, 14, 12, 0, 0, 18432, 82, }, /* 505 */ - { 25, 7, 12, 0, 0, 18432, 82, }, /* 506 */ - { 25, 12, 3, 0, 0, 26624, 130, }, /* 507 */ - { 25, 12, 3, 0, 0, 26624, 146, }, /* 508 */ - { 25, 10, 5, 0, 0, 18432, 174, }, /* 509 */ - { 26, 7, 12, 0, 0, 18432, 82, }, /* 510 */ - { 26, 12, 3, 0, 0, 26624, 130, }, /* 511 */ - { 26, 10, 5, 0, 0, 18432, 176, }, /* 512 */ - { 69, 21, 12, 0, 0, 18573, 124, }, /* 513 */ - { 27, 7, 12, 0, 0, 18432, 82, }, /* 514 */ - { 27, 12, 3, 0, 0, 26624, 130, }, /* 515 */ - { 28, 7, 12, 0, 0, 18432, 82, }, /* 516 */ - { 28, 12, 3, 0, 0, 26624, 130, }, /* 517 */ - { 80, 7, 12, 0, 0, 18432, 82, }, /* 518 */ - { 80, 7, 12, 0, 0, 18432, 140, }, /* 519 */ - { 80, 12, 3, 0, 0, 26624, 100, }, /* 520 */ - { 80, 10, 5, 0, 0, 18432, 144, }, /* 521 */ - { 80, 12, 3, 0, 0, 26624, 130, }, /* 522 */ - { 80, 12, 3, 0, 0, 26624, 96, }, /* 523 */ - { 80, 12, 3, 0, 0, 26624, 146, }, /* 524 */ - { 80, 21, 12, 0, 0, 18432, 106, }, /* 525 */ - { 80, 6, 12, 0, 0, 18432, 142, }, /* 526 */ - { 80, 21, 12, 0, 0, 18432, 68, }, /* 527 */ - { 80, 23, 12, 0, 0, 14336, 68, }, /* 528 */ - { 80, 13, 12, 0, 0, 18432, 138, }, /* 529 */ - { 80, 15, 12, 0, 0, 28672, 68, }, /* 530 */ - { 19, 21, 12, 0, 0, 28672, 68, }, /* 531 */ - { 69, 21, 12, 0, 0, 28777, 106, }, /* 532 */ - { 69, 21, 12, 0, 0, 28777, 124, }, /* 533 */ - { 19, 21, 12, 0, 0, 28672, 106, }, /* 534 */ - { 19, 17, 12, 0, 0, 28672, 126, }, /* 535 */ - { 19, 21, 12, 0, 0, 28672, 124, }, /* 536 */ - { 19, 21, 12, 0, 0, 28672, 178, }, /* 537 */ - { 19, 12, 3, 0, 0, 26624, 180, }, /* 538 */ - { 19, 1, 2, 0, 0, 6144, 66, }, /* 539 */ - { 19, 13, 12, 0, 0, 18432, 138, }, /* 540 */ - { 19, 7, 12, 0, 0, 18432, 82, }, /* 541 */ - { 19, 6, 12, 0, 0, 18432, 136, }, /* 542 */ - { 19, 12, 3, 0, 0, 26624, 182, }, /* 543 */ - { 19, 12, 3, 0, 0, 26624, 130, }, /* 544 */ - { 29, 7, 12, 0, 0, 18432, 82, }, /* 545 */ - { 29, 12, 3, 0, 0, 26624, 130, }, /* 546 */ - { 29, 10, 5, 0, 0, 18432, 144, }, /* 547 */ - { 29, 12, 3, 0, 0, 26624, 96, }, /* 548 */ - { 29, 26, 12, 0, 0, 28672, 68, }, /* 549 */ - { 29, 21, 12, 0, 0, 28672, 124, }, /* 550 */ - { 29, 13, 12, 0, 0, 18432, 138, }, /* 551 */ - { 30, 7, 12, 0, 0, 18432, 82, }, /* 552 */ - { 89, 7, 12, 0, 0, 18432, 82, }, /* 553 */ - { 89, 7, 12, 0, 0, 18432, 156, }, /* 554 */ - { 89, 13, 12, 0, 0, 18432, 138, }, /* 555 */ - { 89, 15, 12, 0, 0, 18432, 138, }, /* 556 */ - { 89, 26, 12, 0, 0, 28672, 68, }, /* 557 */ - { 80, 26, 12, 0, 0, 28672, 68, }, /* 558 */ - { 33, 7, 12, 0, 0, 18432, 82, }, /* 559 */ - { 33, 12, 3, 0, 0, 26624, 130, }, /* 560 */ - { 33, 10, 5, 0, 0, 18432, 144, }, /* 561 */ - { 33, 21, 12, 0, 0, 18432, 68, }, /* 562 */ - { 106, 7, 12, 0, 0, 18432, 82, }, /* 563 */ - { 106, 10, 5, 0, 0, 18432, 144, }, /* 564 */ - { 106, 12, 3, 0, 0, 26624, 130, }, /* 565 */ - { 106, 12, 3, 0, 0, 26624, 184, }, /* 566 */ - { 106, 10, 12, 0, 0, 18432, 144, }, /* 567 */ - { 106, 12, 3, 0, 0, 26624, 96, }, /* 568 */ - { 106, 13, 12, 0, 0, 18432, 138, }, /* 569 */ - { 106, 21, 12, 0, 0, 18432, 68, }, /* 570 */ - { 106, 6, 12, 0, 0, 18432, 136, }, /* 571 */ - { 106, 21, 12, 0, 0, 18432, 124, }, /* 572 */ - { 84, 11, 3, 0, 0, 26624, 186, }, /* 573 */ - { 84, 12, 3, 0, 0, 26624, 130, }, /* 574 */ - { 93, 12, 3, 0, 0, 26624, 130, }, /* 575 */ - { 93, 10, 5, 0, 0, 18432, 144, }, /* 576 */ - { 93, 7, 12, 0, 0, 18432, 82, }, /* 577 */ - { 93, 12, 3, 0, 0, 26624, 96, }, /* 578 */ - { 93, 10, 3, 0, 0, 18432, 148, }, /* 579 */ - { 93, 10, 5, 0, 0, 18432, 174, }, /* 580 */ - { 93, 13, 12, 0, 0, 18432, 138, }, /* 581 */ - { 93, 21, 12, 0, 0, 18432, 124, }, /* 582 */ - { 93, 21, 12, 0, 0, 18432, 68, }, /* 583 */ - { 93, 21, 12, 0, 0, 18432, 106, }, /* 584 */ - { 93, 26, 12, 0, 0, 18432, 68, }, /* 585 */ - { 96, 12, 3, 0, 0, 26624, 130, }, /* 586 */ - { 96, 10, 5, 0, 0, 18432, 144, }, /* 587 */ - { 96, 7, 12, 0, 0, 18432, 82, }, /* 588 */ - { 96, 10, 5, 0, 0, 18432, 174, }, /* 589 */ - { 96, 12, 3, 0, 0, 26624, 146, }, /* 590 */ - { 96, 13, 12, 0, 0, 18432, 138, }, /* 591 */ - { 119, 7, 12, 0, 0, 18432, 82, }, /* 592 */ - { 119, 12, 3, 0, 0, 26624, 102, }, /* 593 */ - { 119, 10, 5, 0, 0, 18432, 144, }, /* 594 */ - { 119, 12, 3, 0, 0, 26624, 130, }, /* 595 */ - { 119, 10, 5, 0, 0, 18432, 176, }, /* 596 */ - { 119, 21, 12, 0, 0, 18432, 68, }, /* 597 */ - { 97, 7, 12, 0, 0, 18432, 82, }, /* 598 */ - { 97, 10, 5, 0, 0, 18432, 144, }, /* 599 */ - { 97, 12, 3, 0, 0, 26624, 130, }, /* 600 */ - { 97, 12, 3, 0, 0, 26624, 188, }, /* 601 */ - { 97, 12, 3, 0, 0, 26624, 96, }, /* 602 */ - { 97, 21, 12, 0, 0, 18432, 124, }, /* 603 */ - { 97, 21, 12, 0, 0, 18432, 106, }, /* 604 */ - { 97, 13, 12, 0, 0, 18432, 138, }, /* 605 */ - { 98, 13, 12, 0, 0, 18432, 138, }, /* 606 */ - { 98, 7, 12, 0, 0, 18432, 82, }, /* 607 */ - { 98, 6, 12, 0, 0, 18432, 92, }, /* 608 */ - { 98, 6, 12, 0, 0, 18432, 94, }, /* 609 */ - { 98, 21, 12, 0, 0, 18432, 124, }, /* 610 */ - { 2, 5, 12, 63, -6222, 18432, 70, }, /* 611 */ - { 2, 5, 12, 67, -6221, 18432, 70, }, /* 612 */ - { 2, 5, 12, 71, -6212, 18432, 70, }, /* 613 */ - { 2, 5, 12, 75, -6210, 18432, 70, }, /* 614 */ - { 2, 5, 12, 79, -6210, 18432, 70, }, /* 615 */ - { 2, 5, 12, 79, -6211, 18432, 70, }, /* 616 */ - { 2, 5, 12, 84, -6204, 18432, 70, }, /* 617 */ - { 2, 5, 12, 88, -6180, 18432, 70, }, /* 618 */ - { 2, 5, 12, 108, 35267, 18432, 70, }, /* 619 */ - { 17, 9, 12, 0, -3008, 18432, 74, }, /* 620 */ - { 96, 21, 12, 0, 0, 18432, 68, }, /* 621 */ - { 84, 12, 3, 0, 0, 26762, 96, }, /* 622 */ - { 84, 12, 3, 0, 0, 26630, 96, }, /* 623 */ - { 69, 21, 12, 0, 0, 18498, 190, }, /* 624 */ - { 84, 12, 3, 0, 0, 26666, 96, }, /* 625 */ - { 84, 12, 3, 0, 0, 26696, 96, }, /* 626 */ - { 84, 12, 3, 0, 0, 26780, 96, }, /* 627 */ - { 69, 10, 5, 0, 0, 18474, 160, }, /* 628 */ - { 69, 7, 12, 0, 0, 18501, 82, }, /* 629 */ - { 69, 7, 12, 0, 0, 18474, 82, }, /* 630 */ - { 69, 7, 12, 0, 0, 18438, 82, }, /* 631 */ - { 69, 7, 12, 0, 0, 18594, 82, }, /* 632 */ - { 69, 7, 12, 0, 0, 18498, 82, }, /* 633 */ - { 84, 12, 3, 0, 0, 26750, 96, }, /* 634 */ - { 69, 10, 5, 0, 0, 18435, 160, }, /* 635 */ - { 84, 12, 3, 0, 0, 26690, 96, }, /* 636 */ - { 69, 7, 12, 0, 0, 18453, 82, }, /* 637 */ - { 2, 5, 12, 0, 0, 18432, 60, }, /* 638 */ - { 1, 6, 12, 0, 0, 18432, 88, }, /* 639 */ - { 2, 6, 12, 0, 0, 18432, 168, }, /* 640 */ - { 0, 5, 12, 0, 35332, 18432, 76, }, /* 641 */ - { 0, 5, 12, 0, 3814, 18432, 76, }, /* 642 */ - { 0, 5, 12, 0, 35384, 18432, 76, }, /* 643 */ - { 0, 5, 12, 0, 0, 18432, 192, }, /* 644 */ - { 0, 6, 12, 0, 0, 18432, 168, }, /* 645 */ - { 0, 6, 12, 0, 0, 18432, 194, }, /* 646 */ - { 1, 6, 12, 0, 0, 18432, 168, }, /* 647 */ - { 84, 12, 3, 0, 0, 26636, 102, }, /* 648 */ - { 84, 12, 3, 0, 0, 26687, 96, }, /* 649 */ - { 84, 12, 3, 0, 0, 26648, 96, }, /* 650 */ - { 0, 9, 12, 92, 1, 18432, 74, }, /* 651 */ - { 0, 5, 12, 92, -1, 18432, 76, }, /* 652 */ - { 0, 5, 12, 0, 0, 18432, 70, }, /* 653 */ - { 0, 5, 12, 92, -58, 18432, 70, }, /* 654 */ - { 0, 9, 12, 0, -7615, 18432, 74, }, /* 655 */ - { 1, 5, 12, 0, 8, 18432, 76, }, /* 656 */ - { 1, 9, 12, 0, -8, 18432, 74, }, /* 657 */ - { 1, 5, 12, 0, 74, 18432, 76, }, /* 658 */ - { 1, 5, 12, 0, 86, 18432, 76, }, /* 659 */ - { 1, 5, 12, 0, 100, 18432, 76, }, /* 660 */ - { 1, 5, 12, 0, 128, 18432, 76, }, /* 661 */ - { 1, 5, 12, 0, 112, 18432, 76, }, /* 662 */ - { 1, 5, 12, 0, 126, 18432, 76, }, /* 663 */ - { 1, 5, 12, 0, 8, 18432, 70, }, /* 664 */ - { 1, 8, 12, 0, -8, 18432, 86, }, /* 665 */ - { 1, 5, 12, 0, 0, 18432, 70, }, /* 666 */ - { 1, 5, 12, 0, 9, 18432, 70, }, /* 667 */ - { 1, 9, 12, 0, -74, 18432, 74, }, /* 668 */ - { 1, 8, 12, 0, -9, 18432, 86, }, /* 669 */ - { 1, 5, 12, 21, -7173, 18432, 76, }, /* 670 */ - { 1, 9, 12, 0, -86, 18432, 74, }, /* 671 */ - { 1, 9, 12, 0, -100, 18432, 74, }, /* 672 */ - { 1, 9, 12, 0, -112, 18432, 74, }, /* 673 */ - { 1, 9, 12, 0, -128, 18432, 74, }, /* 674 */ - { 1, 9, 12, 0, -126, 18432, 74, }, /* 675 */ - { 69, 29, 12, 0, 0, 45056, 52, }, /* 676 */ - { 84, 1, 3, 0, 0, 6144, 196, }, /* 677 */ - { 84, 1, 13, 0, 0, 6144, 198, }, /* 678 */ - { 69, 1, 2, 0, 0, 18432, 200, }, /* 679 */ - { 69, 1, 2, 0, 0, 34816, 200, }, /* 680 */ - { 69, 17, 12, 0, 0, 28672, 202, }, /* 681 */ - { 69, 21, 12, 0, 0, 28672, 64, }, /* 682 */ - { 69, 20, 12, 0, 0, 28672, 204, }, /* 683 */ - { 69, 19, 12, 0, 0, 28672, 204, }, /* 684 */ - { 69, 22, 12, 0, 0, 28672, 206, }, /* 685 */ - { 69, 20, 12, 0, 0, 28672, 206, }, /* 686 */ - { 69, 19, 12, 0, 0, 28672, 206, }, /* 687 */ - { 69, 21, 12, 0, 0, 28672, 208, }, /* 688 */ - { 69, 27, 2, 0, 0, 45056, 50, }, /* 689 */ - { 69, 28, 2, 0, 0, 4096, 50, }, /* 690 */ - { 69, 1, 2, 0, 0, 20480, 134, }, /* 691 */ - { 69, 1, 2, 0, 0, 36864, 134, }, /* 692 */ - { 69, 1, 2, 0, 0, 30720, 134, }, /* 693 */ - { 69, 1, 2, 0, 0, 24576, 134, }, /* 694 */ - { 69, 1, 2, 0, 0, 40960, 134, }, /* 695 */ - { 69, 29, 12, 0, 0, 8291, 52, }, /* 696 */ - { 69, 21, 12, 0, 0, 14336, 54, }, /* 697 */ - { 69, 21, 12, 0, 0, 14336, 64, }, /* 698 */ - { 69, 21, 14, 0, 0, 28672, 210, }, /* 699 */ - { 69, 21, 12, 0, 0, 28672, 212, }, /* 700 */ - { 69, 16, 12, 0, 0, 28672, 138, }, /* 701 */ - { 69, 16, 12, 0, 0, 28672, 214, }, /* 702 */ - { 69, 25, 12, 0, 0, 8192, 64, }, /* 703 */ - { 69, 22, 12, 0, 0, 28672, 216, }, /* 704 */ - { 69, 18, 12, 0, 0, 28672, 216, }, /* 705 */ - { 69, 21, 12, 0, 0, 28672, 202, }, /* 706 */ - { 69, 1, 2, 0, 0, 6144, 218, }, /* 707 */ - { 68, 2, 2, 0, 0, 6144, 220, }, /* 708 */ - { 69, 1, 2, 0, 0, 22528, 134, }, /* 709 */ - { 69, 1, 2, 0, 0, 38912, 134, }, /* 710 */ - { 69, 1, 2, 0, 0, 16384, 134, }, /* 711 */ - { 69, 1, 2, 0, 0, 32768, 134, }, /* 712 */ - { 69, 1, 2, 0, 0, 6144, 222, }, /* 713 */ - { 69, 25, 12, 0, 0, 12288, 118, }, /* 714 */ - { 69, 25, 12, 0, 0, 12288, 224, }, /* 715 */ - { 69, 25, 12, 0, 0, 28672, 118, }, /* 716 */ - { 69, 22, 12, 0, 0, 28672, 226, }, /* 717 */ - { 69, 18, 12, 0, 0, 28672, 226, }, /* 718 */ - { 68, 2, 12, 0, 0, 14336, 0, }, /* 719 */ - { 84, 12, 3, 0, 0, 26624, 228, }, /* 720 */ - { 84, 11, 3, 0, 0, 26624, 120, }, /* 721 */ - { 84, 11, 3, 0, 0, 26624, 230, }, /* 722 */ - { 84, 12, 3, 0, 0, 26753, 102, }, /* 723 */ - { 69, 26, 12, 0, 0, 28672, 68, }, /* 724 */ - { 69, 9, 12, 0, 0, 18432, 112, }, /* 725 */ - { 69, 5, 12, 0, 0, 18432, 232, }, /* 726 */ - { 69, 25, 12, 0, 0, 28672, 234, }, /* 727 */ - { 69, 26, 14, 0, 0, 28672, 236, }, /* 728 */ - { 1, 9, 12, 96, -7517, 18432, 74, }, /* 729 */ - { 69, 26, 12, 0, 0, 28672, 118, }, /* 730 */ - { 0, 9, 12, 100, 0, 18432, 74, }, /* 731 */ - { 0, 9, 12, 104, -8262, 18432, 74, }, /* 732 */ - { 69, 26, 12, 0, 0, 14336, 238, }, /* 733 */ - { 0, 9, 12, 0, 28, 18432, 74, }, /* 734 */ - { 69, 7, 12, 0, 0, 18432, 240, }, /* 735 */ - { 69, 5, 14, 0, 0, 18432, 242, }, /* 736 */ - { 69, 5, 12, 0, 0, 18432, 244, }, /* 737 */ - { 0, 5, 12, 0, -28, 18432, 76, }, /* 738 */ - { 0, 14, 12, 0, 16, 18432, 74, }, /* 739 */ - { 0, 14, 12, 0, -16, 18432, 76, }, /* 740 */ - { 0, 14, 12, 0, 0, 18432, 82, }, /* 741 */ - { 69, 25, 14, 0, 0, 28672, 246, }, /* 742 */ - { 69, 26, 14, 0, 0, 28672, 246, }, /* 743 */ - { 69, 26, 12, 0, 0, 28672, 64, }, /* 744 */ - { 69, 25, 12, 0, 0, 28672, 248, }, /* 745 */ - { 69, 25, 12, 0, 0, 12288, 250, }, /* 746 */ - { 69, 22, 12, 0, 0, 28672, 248, }, /* 747 */ - { 69, 18, 12, 0, 0, 28672, 248, }, /* 748 */ - { 69, 26, 14, 0, 0, 28672, 252, }, /* 749 */ - { 69, 22, 12, 0, 0, 28672, 254, }, /* 750 */ - { 69, 18, 12, 0, 0, 28672, 254, }, /* 751 */ - { 69, 26, 12, 0, 0, 18432, 54, }, /* 752 */ - { 69, 26, 14, 0, 0, 28672, 256, }, /* 753 */ - { 68, 2, 12, 0, 0, 18432, 258, }, /* 754 */ - { 69, 26, 12, 0, 26, 18432, 260, }, /* 755 */ - { 69, 26, 14, 0, 26, 18432, 262, }, /* 756 */ - { 69, 26, 12, 0, -26, 18432, 264, }, /* 757 */ - { 69, 25, 14, 0, 0, 28672, 266, }, /* 758 */ - { 69, 26, 14, 0, 0, 28672, 268, }, /* 759 */ - { 69, 26, 14, 0, 0, 28672, 270, }, /* 760 */ - { 69, 25, 14, 0, 0, 28672, 268, }, /* 761 */ - { 69, 26, 14, 0, 0, 18432, 256, }, /* 762 */ - { 69, 26, 14, 0, 0, 28672, 272, }, /* 763 */ - { 88, 26, 12, 0, 0, 18432, 54, }, /* 764 */ - { 69, 26, 12, 0, 0, 28672, 216, }, /* 765 */ - { 35, 9, 12, 0, 48, 18432, 74, }, /* 766 */ - { 35, 5, 12, 0, -48, 18432, 76, }, /* 767 */ - { 0, 9, 12, 0, -10743, 18432, 74, }, /* 768 */ - { 0, 9, 12, 0, -3814, 18432, 74, }, /* 769 */ - { 0, 9, 12, 0, -10727, 18432, 74, }, /* 770 */ - { 0, 5, 12, 0, -10795, 18432, 76, }, /* 771 */ - { 0, 5, 12, 0, -10792, 18432, 76, }, /* 772 */ - { 0, 9, 12, 0, -10780, 18432, 74, }, /* 773 */ - { 0, 9, 12, 0, -10749, 18432, 74, }, /* 774 */ - { 0, 9, 12, 0, -10783, 18432, 74, }, /* 775 */ - { 0, 9, 12, 0, -10782, 18432, 74, }, /* 776 */ - { 0, 9, 12, 0, -10815, 18432, 74, }, /* 777 */ - { 34, 5, 12, 0, 0, 18432, 60, }, /* 778 */ - { 34, 26, 12, 0, 0, 28672, 68, }, /* 779 */ - { 34, 12, 3, 0, 0, 26624, 96, }, /* 780 */ - { 34, 21, 12, 0, 0, 28672, 68, }, /* 781 */ - { 34, 15, 12, 0, 0, 28672, 68, }, /* 782 */ - { 17, 5, 12, 0, -7264, 18432, 76, }, /* 783 */ - { 90, 7, 12, 0, 0, 18432, 82, }, /* 784 */ - { 90, 6, 12, 0, 0, 18432, 142, }, /* 785 */ - { 90, 21, 12, 0, 0, 18432, 68, }, /* 786 */ - { 90, 12, 3, 0, 0, 26624, 184, }, /* 787 */ - { 2, 12, 3, 0, 0, 26624, 130, }, /* 788 */ - { 69, 20, 12, 0, 0, 28672, 216, }, /* 789 */ - { 69, 19, 12, 0, 0, 28672, 216, }, /* 790 */ - { 69, 6, 12, 0, 0, 28672, 274, }, /* 791 */ - { 69, 21, 12, 0, 0, 28672, 276, }, /* 792 */ - { 69, 21, 12, 0, 0, 28726, 54, }, /* 793 */ - { 23, 26, 12, 0, 0, 28672, 278, }, /* 794 */ - { 69, 26, 12, 0, 0, 28672, 280, }, /* 795 */ - { 69, 26, 12, 0, 0, 28672, 282, }, /* 796 */ - { 69, 21, 12, 0, 0, 28825, 276, }, /* 797 */ - { 69, 21, 12, 0, 0, 28825, 212, }, /* 798 */ - { 69, 21, 12, 0, 0, 28819, 54, }, /* 799 */ - { 23, 6, 12, 0, 0, 18432, 136, }, /* 800 */ - { 69, 7, 12, 0, 0, 18447, 284, }, /* 801 */ - { 23, 14, 12, 0, 0, 18432, 284, }, /* 802 */ - { 69, 22, 12, 0, 0, 28825, 216, }, /* 803 */ - { 69, 18, 12, 0, 0, 28825, 216, }, /* 804 */ - { 69, 22, 12, 0, 0, 28825, 62, }, /* 805 */ - { 69, 18, 12, 0, 0, 28825, 62, }, /* 806 */ - { 69, 26, 12, 0, 0, 28819, 54, }, /* 807 */ - { 69, 17, 12, 0, 0, 28819, 202, }, /* 808 */ - { 69, 22, 12, 0, 0, 28819, 206, }, /* 809 */ - { 69, 18, 12, 0, 0, 28819, 206, }, /* 810 */ - { 84, 12, 3, 0, 0, 26669, 96, }, /* 811 */ - { 18, 10, 3, 0, 0, 18432, 286, }, /* 812 */ - { 69, 17, 14, 0, 0, 28819, 288, }, /* 813 */ - { 69, 6, 12, 0, 0, 18525, 136, }, /* 814 */ - { 69, 26, 12, 0, 0, 28819, 68, }, /* 815 */ - { 23, 6, 12, 0, 0, 18432, 142, }, /* 816 */ - { 69, 7, 12, 0, 0, 18564, 82, }, /* 817 */ - { 69, 21, 14, 0, 0, 28804, 236, }, /* 818 */ - { 69, 26, 12, 0, 0, 28687, 68, }, /* 819 */ - { 20, 7, 12, 0, 0, 18432, 82, }, /* 820 */ - { 84, 12, 3, 0, 0, 26717, 96, }, /* 821 */ - { 69, 24, 12, 0, 0, 28765, 290, }, /* 822 */ - { 20, 6, 12, 0, 0, 18432, 136, }, /* 823 */ - { 69, 17, 12, 0, 0, 28765, 126, }, /* 824 */ - { 21, 7, 12, 0, 0, 18432, 82, }, /* 825 */ - { 69, 21, 12, 0, 0, 28825, 68, }, /* 826 */ - { 69, 6, 12, 0, 0, 18525, 94, }, /* 827 */ - { 21, 6, 12, 0, 0, 18432, 136, }, /* 828 */ - { 22, 7, 12, 0, 0, 18432, 82, }, /* 829 */ - { 18, 7, 12, 0, 0, 18432, 82, }, /* 830 */ - { 18, 7, 12, 0, 0, 18432, 170, }, /* 831 */ - { 69, 26, 12, 0, 0, 18447, 68, }, /* 832 */ - { 69, 15, 12, 0, 0, 18447, 68, }, /* 833 */ - { 18, 26, 12, 0, 0, 18432, 68, }, /* 834 */ - { 18, 26, 12, 0, 0, 28672, 68, }, /* 835 */ - { 69, 15, 12, 0, 0, 18432, 68, }, /* 836 */ - { 69, 26, 14, 0, 0, 18447, 236, }, /* 837 */ - { 21, 26, 12, 0, 0, 18432, 68, }, /* 838 */ - { 23, 7, 12, 0, 0, 18432, 292, }, /* 839 */ - { 24, 7, 12, 0, 0, 18432, 82, }, /* 840 */ - { 24, 6, 12, 0, 0, 18432, 136, }, /* 841 */ - { 24, 26, 12, 0, 0, 28672, 68, }, /* 842 */ - { 111, 7, 12, 0, 0, 18432, 82, }, /* 843 */ - { 111, 6, 12, 0, 0, 18432, 142, }, /* 844 */ - { 111, 21, 12, 0, 0, 18432, 106, }, /* 845 */ - { 111, 21, 12, 0, 0, 18432, 124, }, /* 846 */ - { 99, 7, 12, 0, 0, 18432, 82, }, /* 847 */ - { 99, 6, 12, 0, 0, 18432, 136, }, /* 848 */ - { 99, 21, 12, 0, 0, 28672, 106, }, /* 849 */ - { 99, 21, 12, 0, 0, 28672, 124, }, /* 850 */ - { 99, 13, 12, 0, 0, 18432, 138, }, /* 851 */ - { 2, 9, 12, 108, 1, 18432, 74, }, /* 852 */ - { 2, 5, 12, 108, -35267, 18432, 76, }, /* 853 */ - { 2, 7, 12, 0, 0, 18432, 82, }, /* 854 */ - { 2, 21, 12, 0, 0, 28672, 68, }, /* 855 */ - { 2, 12, 3, 0, 0, 26624, 96, }, /* 856 */ - { 2, 6, 12, 0, 0, 28672, 92, }, /* 857 */ - { 2, 6, 12, 0, 0, 18432, 88, }, /* 858 */ - { 112, 7, 12, 0, 0, 18432, 82, }, /* 859 */ - { 112, 14, 12, 0, 0, 18432, 82, }, /* 860 */ - { 112, 12, 3, 0, 0, 26624, 96, }, /* 861 */ - { 112, 21, 12, 0, 0, 18432, 68, }, /* 862 */ - { 112, 21, 12, 0, 0, 18432, 124, }, /* 863 */ - { 112, 21, 12, 0, 0, 18432, 106, }, /* 864 */ - { 69, 24, 12, 0, 0, 28762, 56, }, /* 865 */ - { 0, 9, 12, 0, -35332, 18432, 74, }, /* 866 */ - { 69, 24, 12, 0, 0, 18432, 56, }, /* 867 */ - { 0, 9, 12, 0, -42280, 18432, 74, }, /* 868 */ - { 0, 5, 12, 0, 48, 18432, 76, }, /* 869 */ - { 0, 9, 12, 0, -42308, 18432, 74, }, /* 870 */ - { 0, 9, 12, 0, -42319, 18432, 74, }, /* 871 */ - { 0, 9, 12, 0, -42315, 18432, 74, }, /* 872 */ - { 0, 9, 12, 0, -42305, 18432, 74, }, /* 873 */ - { 0, 9, 12, 0, -42258, 18432, 74, }, /* 874 */ - { 0, 9, 12, 0, -42282, 18432, 74, }, /* 875 */ - { 0, 9, 12, 0, -42261, 18432, 74, }, /* 876 */ - { 0, 9, 12, 0, 928, 18432, 74, }, /* 877 */ - { 0, 9, 12, 0, -48, 18432, 74, }, /* 878 */ - { 0, 9, 12, 0, -42307, 18432, 74, }, /* 879 */ - { 0, 9, 12, 0, -35384, 18432, 74, }, /* 880 */ - { 36, 7, 12, 0, 0, 18432, 82, }, /* 881 */ - { 36, 12, 3, 0, 0, 26624, 130, }, /* 882 */ - { 36, 12, 3, 0, 0, 26624, 184, }, /* 883 */ - { 36, 10, 5, 0, 0, 18432, 144, }, /* 884 */ - { 36, 26, 12, 0, 0, 28672, 68, }, /* 885 */ - { 69, 15, 12, 0, 0, 18612, 68, }, /* 886 */ - { 69, 15, 12, 0, 0, 18609, 68, }, /* 887 */ - { 69, 26, 12, 0, 0, 18600, 68, }, /* 888 */ - { 69, 23, 12, 0, 0, 14504, 68, }, /* 889 */ - { 69, 26, 12, 0, 0, 14504, 68, }, /* 890 */ - { 37, 7, 12, 0, 0, 18432, 82, }, /* 891 */ - { 37, 21, 12, 0, 0, 28672, 68, }, /* 892 */ - { 37, 21, 12, 0, 0, 28672, 124, }, /* 893 */ - { 100, 10, 5, 0, 0, 18432, 144, }, /* 894 */ - { 100, 7, 12, 0, 0, 18432, 82, }, /* 895 */ - { 100, 12, 3, 0, 0, 26624, 146, }, /* 896 */ - { 100, 12, 3, 0, 0, 26624, 130, }, /* 897 */ - { 100, 21, 12, 0, 0, 18432, 124, }, /* 898 */ - { 100, 13, 12, 0, 0, 18432, 138, }, /* 899 */ - { 6, 12, 3, 0, 0, 26666, 96, }, /* 900 */ - { 6, 7, 12, 0, 0, 18507, 82, }, /* 901 */ - { 39, 13, 12, 0, 0, 18432, 138, }, /* 902 */ - { 39, 7, 12, 0, 0, 18432, 82, }, /* 903 */ - { 39, 12, 3, 0, 0, 26624, 130, }, /* 904 */ - { 39, 12, 3, 0, 0, 26624, 96, }, /* 905 */ - { 69, 21, 12, 0, 0, 18567, 190, }, /* 906 */ - { 39, 21, 12, 0, 0, 18432, 124, }, /* 907 */ - { 101, 7, 12, 0, 0, 18432, 82, }, /* 908 */ - { 101, 12, 3, 0, 0, 26624, 130, }, /* 909 */ - { 101, 10, 5, 0, 0, 18432, 144, }, /* 910 */ - { 101, 10, 5, 0, 0, 18432, 174, }, /* 911 */ - { 101, 21, 12, 0, 0, 18432, 68, }, /* 912 */ - { 40, 12, 3, 0, 0, 26624, 130, }, /* 913 */ - { 40, 10, 5, 0, 0, 18432, 144, }, /* 914 */ - { 40, 7, 12, 0, 0, 18432, 82, }, /* 915 */ - { 40, 12, 3, 0, 0, 26624, 96, }, /* 916 */ - { 40, 10, 5, 0, 0, 18432, 174, }, /* 917 */ - { 40, 21, 12, 0, 0, 18432, 68, }, /* 918 */ - { 40, 21, 12, 0, 0, 18432, 106, }, /* 919 */ - { 40, 21, 12, 0, 0, 18432, 124, }, /* 920 */ - { 69, 6, 12, 0, 0, 18480, 136, }, /* 921 */ - { 40, 13, 12, 0, 0, 18432, 138, }, /* 922 */ - { 16, 6, 12, 0, 0, 18432, 136, }, /* 923 */ - { 105, 7, 12, 0, 0, 18432, 82, }, /* 924 */ - { 105, 12, 3, 0, 0, 26624, 130, }, /* 925 */ - { 105, 10, 5, 0, 0, 18432, 144, }, /* 926 */ - { 105, 13, 12, 0, 0, 18432, 138, }, /* 927 */ - { 105, 21, 12, 0, 0, 18432, 68, }, /* 928 */ - { 105, 21, 12, 0, 0, 18432, 124, }, /* 929 */ - { 107, 7, 12, 0, 0, 18432, 82, }, /* 930 */ - { 107, 12, 3, 0, 0, 26624, 130, }, /* 931 */ - { 107, 7, 12, 0, 0, 18432, 156, }, /* 932 */ - { 107, 12, 3, 0, 0, 26624, 96, }, /* 933 */ - { 107, 7, 12, 0, 0, 18432, 294, }, /* 934 */ - { 107, 6, 12, 0, 0, 18432, 136, }, /* 935 */ - { 107, 21, 12, 0, 0, 18432, 68, }, /* 936 */ - { 107, 21, 12, 0, 0, 18432, 106, }, /* 937 */ - { 113, 7, 12, 0, 0, 18432, 82, }, /* 938 */ - { 113, 10, 5, 0, 0, 18432, 144, }, /* 939 */ - { 113, 12, 3, 0, 0, 26624, 130, }, /* 940 */ - { 113, 21, 12, 0, 0, 18432, 124, }, /* 941 */ - { 113, 6, 12, 0, 0, 18432, 136, }, /* 942 */ - { 113, 12, 3, 0, 0, 26624, 146, }, /* 943 */ - { 0, 5, 12, 0, -928, 18432, 76, }, /* 944 */ - { 76, 5, 12, 0, -38864, 18432, 70, }, /* 945 */ - { 113, 10, 5, 0, 0, 18432, 160, }, /* 946 */ - { 113, 13, 12, 0, 0, 18432, 138, }, /* 947 */ - { 18, 7, 9, 0, 0, 18432, 82, }, /* 948 */ - { 18, 7, 10, 0, 0, 18432, 82, }, /* 949 */ - { 68, 4, 12, 0, 0, 18432, 0, }, /* 950 */ - { 68, 3, 12, 0, 0, 18432, 0, }, /* 951 */ - { 23, 7, 12, 0, 0, 18432, 284, }, /* 952 */ - { 71, 25, 12, 0, 0, 12288, 118, }, /* 953 */ - { 3, 7, 12, 0, 0, 0, 296, }, /* 954 */ - { 69, 18, 12, 0, 0, 28705, 54, }, /* 955 */ - { 69, 22, 12, 0, 0, 28705, 54, }, /* 956 */ - { 68, 2, 12, 0, 0, 6144, 298, }, /* 957 */ - { 3, 7, 12, 0, 0, 39, 82, }, /* 958 */ - { 3, 26, 12, 0, 0, 28711, 68, }, /* 959 */ - { 84, 12, 3, 0, 0, 26624, 180, }, /* 960 */ - { 84, 12, 3, 0, 0, 26624, 300, }, /* 961 */ - { 69, 21, 12, 0, 0, 28672, 68, }, /* 962 */ - { 69, 21, 12, 0, 0, 28672, 122, }, /* 963 */ - { 69, 22, 12, 0, 0, 28672, 68, }, /* 964 */ - { 69, 18, 12, 0, 0, 28672, 68, }, /* 965 */ - { 69, 17, 12, 0, 0, 28672, 126, }, /* 966 */ - { 69, 22, 12, 0, 0, 28672, 302, }, /* 967 */ - { 69, 18, 12, 0, 0, 28672, 302, }, /* 968 */ - { 69, 21, 12, 0, 0, 8192, 106, }, /* 969 */ - { 69, 21, 12, 0, 0, 8192, 304, }, /* 970 */ - { 69, 21, 12, 0, 0, 8192, 306, }, /* 971 */ - { 69, 21, 12, 0, 0, 28672, 124, }, /* 972 */ - { 69, 22, 12, 0, 0, 28672, 158, }, /* 973 */ - { 69, 18, 12, 0, 0, 28672, 158, }, /* 974 */ - { 69, 21, 12, 0, 0, 14336, 68, }, /* 975 */ - { 69, 21, 12, 0, 0, 28672, 118, }, /* 976 */ - { 69, 17, 12, 0, 0, 12288, 224, }, /* 977 */ - { 69, 25, 12, 0, 0, 28672, 226, }, /* 978 */ - { 69, 21, 12, 0, 0, 28672, 302, }, /* 979 */ - { 69, 21, 12, 0, 0, 28672, 308, }, /* 980 */ - { 69, 17, 12, 0, 0, 12288, 126, }, /* 981 */ - { 69, 21, 12, 0, 0, 8192, 68, }, /* 982 */ - { 69, 13, 12, 0, 0, 10240, 310, }, /* 983 */ - { 0, 9, 12, 0, 32, 18432, 312, }, /* 984 */ - { 69, 24, 12, 0, 0, 28672, 314, }, /* 985 */ - { 0, 5, 12, 0, -32, 18432, 316, }, /* 986 */ - { 69, 21, 12, 0, 0, 28825, 124, }, /* 987 */ - { 69, 22, 12, 0, 0, 28825, 318, }, /* 988 */ - { 69, 18, 12, 0, 0, 28825, 318, }, /* 989 */ - { 69, 21, 12, 0, 0, 28825, 106, }, /* 990 */ - { 69, 6, 3, 0, 0, 18525, 320, }, /* 991 */ - { 69, 1, 2, 0, 0, 28672, 322, }, /* 992 */ - { 31, 7, 12, 0, 0, 18432, 82, }, /* 993 */ - { 69, 21, 12, 0, 0, 18552, 68, }, /* 994 */ - { 69, 21, 12, 0, 0, 28792, 68, }, /* 995 */ - { 69, 21, 12, 0, 0, 18483, 68, }, /* 996 */ - { 69, 15, 12, 0, 0, 18555, 68, }, /* 997 */ - { 69, 26, 12, 0, 0, 18483, 68, }, /* 998 */ - { 1, 14, 12, 0, 0, 28672, 82, }, /* 999 */ - { 1, 15, 12, 0, 0, 28672, 68, }, /* 1000 */ - { 1, 26, 12, 0, 0, 28672, 68, }, /* 1001 */ - { 1, 26, 12, 0, 0, 18432, 68, }, /* 1002 */ - { 102, 7, 12, 0, 0, 18432, 82, }, /* 1003 */ - { 103, 7, 12, 0, 0, 18432, 82, }, /* 1004 */ - { 84, 12, 3, 0, 0, 26651, 96, }, /* 1005 */ - { 69, 15, 12, 0, 0, 10267, 68, }, /* 1006 */ - { 81, 7, 12, 0, 0, 18432, 82, }, /* 1007 */ - { 81, 15, 12, 0, 0, 18432, 68, }, /* 1008 */ - { 82, 7, 12, 0, 0, 18432, 82, }, /* 1009 */ - { 82, 14, 12, 0, 0, 18432, 82, }, /* 1010 */ - { 53, 7, 12, 0, 0, 18432, 82, }, /* 1011 */ - { 53, 12, 3, 0, 0, 26624, 130, }, /* 1012 */ - { 85, 7, 12, 0, 0, 18432, 82, }, /* 1013 */ - { 85, 21, 12, 0, 0, 18432, 106, }, /* 1014 */ - { 91, 7, 12, 0, 0, 18432, 82, }, /* 1015 */ - { 91, 21, 12, 0, 0, 18432, 106, }, /* 1016 */ - { 91, 14, 12, 0, 0, 18432, 82, }, /* 1017 */ - { 83, 9, 12, 0, 40, 18432, 74, }, /* 1018 */ - { 83, 5, 12, 0, -40, 18432, 76, }, /* 1019 */ - { 86, 7, 12, 0, 0, 18432, 82, }, /* 1020 */ - { 87, 7, 12, 0, 0, 18432, 82, }, /* 1021 */ - { 87, 13, 12, 0, 0, 18432, 138, }, /* 1022 */ - { 145, 9, 12, 0, 40, 18432, 74, }, /* 1023 */ - { 145, 5, 12, 0, -40, 18432, 76, }, /* 1024 */ - { 127, 7, 12, 0, 0, 18432, 82, }, /* 1025 */ - { 125, 7, 12, 0, 0, 18432, 82, }, /* 1026 */ - { 125, 21, 12, 0, 0, 18432, 68, }, /* 1027 */ - { 161, 9, 12, 0, 39, 18432, 74, }, /* 1028 */ - { 161, 5, 12, 0, -39, 18432, 76, }, /* 1029 */ - { 49, 7, 12, 0, 0, 18432, 82, }, /* 1030 */ - { 0, 6, 12, 0, 0, 18432, 94, }, /* 1031 */ - { 32, 7, 12, 0, 0, 34816, 82, }, /* 1032 */ - { 114, 7, 12, 0, 0, 34816, 82, }, /* 1033 */ - { 114, 21, 12, 0, 0, 34816, 106, }, /* 1034 */ - { 114, 15, 12, 0, 0, 34816, 68, }, /* 1035 */ - { 133, 7, 12, 0, 0, 34816, 82, }, /* 1036 */ - { 133, 26, 12, 0, 0, 34816, 68, }, /* 1037 */ - { 133, 15, 12, 0, 0, 34816, 68, }, /* 1038 */ - { 132, 7, 12, 0, 0, 34816, 82, }, /* 1039 */ - { 132, 15, 12, 0, 0, 34816, 68, }, /* 1040 */ - { 139, 7, 12, 0, 0, 34816, 82, }, /* 1041 */ - { 139, 15, 12, 0, 0, 34816, 68, }, /* 1042 */ - { 95, 7, 12, 0, 0, 34816, 82, }, /* 1043 */ - { 95, 15, 12, 0, 0, 34816, 68, }, /* 1044 */ - { 95, 21, 12, 0, 0, 28672, 106, }, /* 1045 */ - { 104, 7, 12, 0, 0, 34816, 82, }, /* 1046 */ - { 104, 21, 12, 0, 0, 34816, 68, }, /* 1047 */ - { 122, 7, 12, 0, 0, 34816, 82, }, /* 1048 */ - { 121, 7, 12, 0, 0, 34816, 82, }, /* 1049 */ - { 121, 15, 12, 0, 0, 34816, 68, }, /* 1050 */ - { 92, 7, 12, 0, 0, 34816, 82, }, /* 1051 */ - { 92, 12, 3, 0, 0, 26624, 130, }, /* 1052 */ - { 92, 12, 3, 0, 0, 26624, 102, }, /* 1053 */ - { 92, 12, 3, 0, 0, 26624, 184, }, /* 1054 */ - { 92, 15, 12, 0, 0, 34816, 68, }, /* 1055 */ - { 92, 21, 12, 0, 0, 34816, 68, }, /* 1056 */ - { 92, 21, 12, 0, 0, 34816, 124, }, /* 1057 */ - { 115, 7, 12, 0, 0, 34816, 82, }, /* 1058 */ - { 115, 15, 12, 0, 0, 34816, 68, }, /* 1059 */ - { 115, 21, 12, 0, 0, 34816, 68, }, /* 1060 */ - { 131, 7, 12, 0, 0, 34816, 82, }, /* 1061 */ - { 131, 15, 12, 0, 0, 34816, 68, }, /* 1062 */ - { 51, 7, 12, 0, 0, 34816, 82, }, /* 1063 */ - { 51, 26, 12, 0, 0, 34816, 68, }, /* 1064 */ - { 51, 12, 3, 0, 0, 26624, 96, }, /* 1065 */ - { 51, 15, 12, 0, 0, 34816, 68, }, /* 1066 */ - { 51, 21, 12, 0, 0, 34816, 106, }, /* 1067 */ - { 51, 21, 12, 0, 0, 34918, 106, }, /* 1068 */ - { 51, 21, 12, 0, 0, 34816, 68, }, /* 1069 */ - { 108, 7, 12, 0, 0, 34816, 82, }, /* 1070 */ - { 108, 21, 12, 0, 0, 28672, 68, }, /* 1071 */ - { 108, 21, 12, 0, 0, 28672, 106, }, /* 1072 */ - { 116, 7, 12, 0, 0, 34816, 82, }, /* 1073 */ - { 116, 15, 12, 0, 0, 34816, 68, }, /* 1074 */ - { 117, 7, 12, 0, 0, 34816, 82, }, /* 1075 */ - { 117, 15, 12, 0, 0, 34816, 68, }, /* 1076 */ - { 54, 7, 12, 0, 0, 34816, 82, }, /* 1077 */ - { 54, 21, 12, 0, 0, 34816, 106, }, /* 1078 */ - { 54, 15, 12, 0, 0, 34816, 68, }, /* 1079 */ - { 118, 7, 12, 0, 0, 34816, 82, }, /* 1080 */ - { 140, 9, 12, 0, 64, 34816, 74, }, /* 1081 */ - { 140, 5, 12, 0, -64, 34816, 76, }, /* 1082 */ - { 140, 15, 12, 0, 0, 34816, 68, }, /* 1083 */ - { 62, 7, 12, 0, 0, 0, 82, }, /* 1084 */ - { 62, 7, 12, 0, 0, 0, 294, }, /* 1085 */ - { 62, 12, 3, 0, 0, 26624, 128, }, /* 1086 */ - { 62, 13, 12, 0, 0, 2048, 138, }, /* 1087 */ - { 3, 15, 12, 0, 0, 2048, 68, }, /* 1088 */ - { 65, 7, 12, 0, 0, 34816, 82, }, /* 1089 */ - { 65, 12, 3, 0, 0, 26624, 130, }, /* 1090 */ - { 65, 17, 12, 0, 0, 34816, 126, }, /* 1091 */ - { 152, 7, 12, 0, 0, 34816, 82, }, /* 1092 */ - { 152, 15, 12, 0, 0, 34816, 68, }, /* 1093 */ - { 63, 7, 12, 0, 0, 0, 82, }, /* 1094 */ - { 63, 12, 3, 0, 0, 26624, 96, }, /* 1095 */ - { 63, 15, 12, 0, 0, 0, 68, }, /* 1096 */ - { 63, 21, 12, 0, 0, 0, 124, }, /* 1097 */ - { 67, 7, 12, 0, 0, 34816, 82, }, /* 1098 */ - { 67, 12, 3, 0, 0, 26624, 96, }, /* 1099 */ - { 67, 21, 12, 0, 0, 34816, 124, }, /* 1100 */ - { 156, 7, 12, 0, 0, 34816, 82, }, /* 1101 */ - { 156, 15, 12, 0, 0, 34816, 68, }, /* 1102 */ - { 153, 7, 12, 0, 0, 34816, 82, }, /* 1103 */ - { 120, 10, 5, 0, 0, 18432, 144, }, /* 1104 */ - { 120, 12, 3, 0, 0, 26624, 130, }, /* 1105 */ - { 120, 7, 12, 0, 0, 18432, 82, }, /* 1106 */ - { 120, 12, 3, 0, 0, 26624, 146, }, /* 1107 */ - { 120, 21, 12, 0, 0, 18432, 124, }, /* 1108 */ - { 120, 21, 12, 0, 0, 18432, 106, }, /* 1109 */ - { 120, 15, 12, 0, 0, 28672, 68, }, /* 1110 */ - { 120, 13, 12, 0, 0, 18432, 138, }, /* 1111 */ - { 120, 12, 3, 0, 0, 26624, 184, }, /* 1112 */ - { 41, 12, 3, 0, 0, 26624, 130, }, /* 1113 */ - { 41, 10, 5, 0, 0, 18432, 144, }, /* 1114 */ - { 41, 7, 12, 0, 0, 18432, 82, }, /* 1115 */ - { 41, 12, 3, 0, 0, 26624, 146, }, /* 1116 */ - { 41, 12, 3, 0, 0, 26624, 96, }, /* 1117 */ - { 41, 21, 12, 0, 0, 18432, 68, }, /* 1118 */ - { 41, 1, 4, 0, 0, 18432, 132, }, /* 1119 */ - { 41, 21, 12, 0, 0, 18432, 124, }, /* 1120 */ - { 124, 7, 12, 0, 0, 18432, 82, }, /* 1121 */ - { 124, 13, 12, 0, 0, 18432, 138, }, /* 1122 */ - { 43, 12, 3, 0, 0, 26624, 130, }, /* 1123 */ - { 43, 7, 12, 0, 0, 18432, 82, }, /* 1124 */ - { 43, 10, 5, 0, 0, 18432, 144, }, /* 1125 */ - { 43, 12, 3, 0, 0, 26624, 146, }, /* 1126 */ - { 43, 13, 12, 0, 0, 18432, 138, }, /* 1127 */ - { 43, 21, 12, 0, 0, 18432, 68, }, /* 1128 */ - { 43, 21, 12, 0, 0, 18432, 124, }, /* 1129 */ - { 50, 7, 12, 0, 0, 18432, 82, }, /* 1130 */ - { 50, 12, 3, 0, 0, 26624, 96, }, /* 1131 */ - { 50, 21, 12, 0, 0, 18432, 68, }, /* 1132 */ - { 44, 12, 3, 0, 0, 26624, 130, }, /* 1133 */ - { 44, 10, 5, 0, 0, 18432, 144, }, /* 1134 */ - { 44, 7, 12, 0, 0, 18432, 82, }, /* 1135 */ - { 44, 10, 5, 0, 0, 18432, 174, }, /* 1136 */ - { 44, 7, 4, 0, 0, 18432, 82, }, /* 1137 */ - { 44, 21, 12, 0, 0, 18432, 124, }, /* 1138 */ - { 44, 21, 12, 0, 0, 18432, 68, }, /* 1139 */ - { 44, 12, 3, 0, 0, 26624, 102, }, /* 1140 */ - { 44, 12, 3, 0, 0, 26624, 96, }, /* 1141 */ - { 44, 13, 12, 0, 0, 18432, 138, }, /* 1142 */ - { 15, 15, 12, 0, 0, 18432, 68, }, /* 1143 */ - { 48, 7, 12, 0, 0, 18432, 82, }, /* 1144 */ - { 48, 10, 5, 0, 0, 18432, 144, }, /* 1145 */ - { 48, 12, 3, 0, 0, 26624, 130, }, /* 1146 */ - { 48, 10, 5, 0, 0, 18432, 174, }, /* 1147 */ - { 48, 12, 3, 0, 0, 26624, 96, }, /* 1148 */ - { 48, 21, 12, 0, 0, 18432, 124, }, /* 1149 */ - { 48, 21, 12, 0, 0, 18432, 106, }, /* 1150 */ - { 48, 21, 12, 0, 0, 18432, 68, }, /* 1151 */ - { 57, 7, 12, 0, 0, 18432, 82, }, /* 1152 */ - { 57, 21, 12, 0, 0, 18432, 124, }, /* 1153 */ - { 55, 7, 12, 0, 0, 18432, 82, }, /* 1154 */ - { 55, 12, 3, 0, 0, 26624, 130, }, /* 1155 */ - { 55, 10, 5, 0, 0, 18432, 144, }, /* 1156 */ - { 55, 12, 3, 0, 0, 26624, 96, }, /* 1157 */ - { 55, 12, 3, 0, 0, 26624, 146, }, /* 1158 */ - { 55, 13, 12, 0, 0, 18432, 138, }, /* 1159 */ - { 47, 12, 3, 0, 0, 26624, 130, }, /* 1160 */ - { 47, 12, 3, 0, 0, 26705, 130, }, /* 1161 */ - { 47, 10, 5, 0, 0, 18432, 144, }, /* 1162 */ - { 47, 10, 5, 0, 0, 18513, 144, }, /* 1163 */ - { 47, 7, 12, 0, 0, 18432, 82, }, /* 1164 */ - { 84, 12, 3, 0, 0, 26705, 102, }, /* 1165 */ - { 47, 12, 3, 0, 0, 26705, 96, }, /* 1166 */ - { 47, 10, 3, 0, 0, 18432, 148, }, /* 1167 */ - { 47, 10, 5, 0, 0, 18432, 174, }, /* 1168 */ - { 47, 7, 12, 0, 0, 18432, 324, }, /* 1169 */ - { 47, 12, 3, 0, 0, 26624, 96, }, /* 1170 */ - { 144, 7, 12, 0, 0, 18432, 82, }, /* 1171 */ - { 144, 10, 5, 0, 0, 18432, 144, }, /* 1172 */ - { 144, 12, 3, 0, 0, 26624, 130, }, /* 1173 */ - { 144, 12, 3, 0, 0, 26624, 146, }, /* 1174 */ - { 144, 12, 3, 0, 0, 26624, 96, }, /* 1175 */ - { 144, 21, 12, 0, 0, 18432, 124, }, /* 1176 */ - { 144, 21, 12, 0, 0, 18432, 106, }, /* 1177 */ - { 144, 21, 12, 0, 0, 18432, 68, }, /* 1178 */ - { 144, 13, 12, 0, 0, 18432, 138, }, /* 1179 */ - { 144, 12, 3, 0, 0, 26624, 102, }, /* 1180 */ - { 56, 7, 12, 0, 0, 18432, 82, }, /* 1181 */ - { 56, 10, 3, 0, 0, 18432, 148, }, /* 1182 */ - { 56, 10, 5, 0, 0, 18432, 144, }, /* 1183 */ - { 56, 12, 3, 0, 0, 26624, 130, }, /* 1184 */ - { 56, 12, 3, 0, 0, 26624, 146, }, /* 1185 */ - { 56, 12, 3, 0, 0, 26624, 96, }, /* 1186 */ - { 56, 21, 12, 0, 0, 18432, 68, }, /* 1187 */ - { 56, 13, 12, 0, 0, 18432, 138, }, /* 1188 */ - { 135, 7, 12, 0, 0, 18432, 82, }, /* 1189 */ - { 135, 10, 3, 0, 0, 18432, 148, }, /* 1190 */ - { 135, 10, 5, 0, 0, 18432, 144, }, /* 1191 */ - { 135, 12, 3, 0, 0, 26624, 130, }, /* 1192 */ - { 135, 12, 3, 0, 0, 26624, 146, }, /* 1193 */ - { 135, 12, 3, 0, 0, 26624, 96, }, /* 1194 */ - { 135, 21, 12, 0, 0, 18432, 68, }, /* 1195 */ - { 135, 21, 12, 0, 0, 18432, 124, }, /* 1196 */ - { 135, 21, 12, 0, 0, 18432, 106, }, /* 1197 */ - { 135, 21, 12, 0, 0, 18432, 178, }, /* 1198 */ - { 52, 7, 12, 0, 0, 18432, 82, }, /* 1199 */ - { 52, 10, 5, 0, 0, 18432, 144, }, /* 1200 */ - { 52, 12, 3, 0, 0, 26624, 130, }, /* 1201 */ - { 52, 12, 3, 0, 0, 26624, 146, }, /* 1202 */ - { 52, 21, 12, 0, 0, 18432, 124, }, /* 1203 */ - { 52, 21, 12, 0, 0, 18432, 68, }, /* 1204 */ - { 52, 13, 12, 0, 0, 18432, 138, }, /* 1205 */ - { 45, 7, 12, 0, 0, 18432, 82, }, /* 1206 */ - { 45, 12, 3, 0, 0, 26624, 130, }, /* 1207 */ - { 45, 10, 5, 0, 0, 18432, 144, }, /* 1208 */ - { 45, 10, 5, 0, 0, 18432, 174, }, /* 1209 */ - { 45, 12, 3, 0, 0, 26624, 96, }, /* 1210 */ - { 45, 21, 12, 0, 0, 18432, 68, }, /* 1211 */ - { 45, 13, 12, 0, 0, 18432, 138, }, /* 1212 */ - { 137, 7, 12, 0, 0, 18432, 82, }, /* 1213 */ - { 137, 12, 3, 0, 0, 26624, 130, }, /* 1214 */ - { 137, 10, 12, 0, 0, 18432, 144, }, /* 1215 */ - { 137, 10, 5, 0, 0, 18432, 144, }, /* 1216 */ - { 137, 12, 3, 0, 0, 26624, 146, }, /* 1217 */ - { 137, 13, 12, 0, 0, 18432, 138, }, /* 1218 */ - { 137, 15, 12, 0, 0, 18432, 68, }, /* 1219 */ - { 137, 21, 12, 0, 0, 18432, 124, }, /* 1220 */ - { 137, 26, 12, 0, 0, 18432, 68, }, /* 1221 */ - { 60, 7, 12, 0, 0, 18432, 82, }, /* 1222 */ - { 60, 10, 5, 0, 0, 18432, 144, }, /* 1223 */ - { 60, 12, 3, 0, 0, 26624, 130, }, /* 1224 */ - { 60, 12, 3, 0, 0, 26624, 146, }, /* 1225 */ - { 60, 12, 3, 0, 0, 26624, 96, }, /* 1226 */ - { 60, 21, 12, 0, 0, 18432, 68, }, /* 1227 */ - { 136, 9, 12, 0, 32, 18432, 74, }, /* 1228 */ - { 136, 5, 12, 0, -32, 18432, 76, }, /* 1229 */ - { 136, 13, 12, 0, 0, 18432, 138, }, /* 1230 */ - { 136, 15, 12, 0, 0, 18432, 68, }, /* 1231 */ - { 136, 7, 12, 0, 0, 18432, 82, }, /* 1232 */ - { 157, 7, 12, 0, 0, 18432, 82, }, /* 1233 */ - { 157, 10, 3, 0, 0, 18432, 148, }, /* 1234 */ - { 157, 10, 5, 0, 0, 18432, 144, }, /* 1235 */ - { 157, 12, 3, 0, 0, 26624, 130, }, /* 1236 */ - { 157, 10, 5, 0, 0, 18432, 174, }, /* 1237 */ - { 157, 12, 3, 0, 0, 26624, 146, }, /* 1238 */ - { 157, 7, 4, 0, 0, 18432, 82, }, /* 1239 */ - { 157, 12, 3, 0, 0, 26624, 96, }, /* 1240 */ - { 157, 21, 12, 0, 0, 18432, 124, }, /* 1241 */ - { 157, 21, 12, 0, 0, 18432, 68, }, /* 1242 */ - { 157, 13, 12, 0, 0, 18432, 138, }, /* 1243 */ - { 64, 7, 12, 0, 0, 18432, 82, }, /* 1244 */ - { 64, 10, 5, 0, 0, 18432, 144, }, /* 1245 */ - { 64, 12, 3, 0, 0, 26624, 130, }, /* 1246 */ - { 64, 12, 3, 0, 0, 26624, 146, }, /* 1247 */ - { 64, 21, 12, 0, 0, 18432, 68, }, /* 1248 */ - { 149, 7, 12, 0, 0, 18432, 82, }, /* 1249 */ - { 149, 12, 3, 0, 0, 26624, 130, }, /* 1250 */ - { 149, 12, 3, 0, 0, 18432, 130, }, /* 1251 */ - { 149, 12, 3, 0, 0, 26624, 102, }, /* 1252 */ - { 149, 12, 3, 0, 0, 26624, 146, }, /* 1253 */ - { 149, 10, 5, 0, 0, 18432, 144, }, /* 1254 */ - { 149, 7, 4, 0, 0, 18432, 82, }, /* 1255 */ - { 149, 21, 12, 0, 0, 18432, 68, }, /* 1256 */ - { 149, 21, 12, 0, 0, 18432, 124, }, /* 1257 */ - { 148, 7, 12, 0, 0, 18432, 82, }, /* 1258 */ - { 148, 12, 3, 0, 0, 26624, 130, }, /* 1259 */ - { 148, 10, 5, 0, 0, 18432, 144, }, /* 1260 */ - { 148, 7, 4, 0, 0, 18432, 82, }, /* 1261 */ - { 148, 12, 3, 0, 0, 26624, 326, }, /* 1262 */ - { 148, 12, 3, 0, 0, 26624, 146, }, /* 1263 */ - { 148, 21, 12, 0, 0, 18432, 68, }, /* 1264 */ - { 148, 21, 12, 0, 0, 18432, 124, }, /* 1265 */ - { 148, 21, 12, 0, 0, 18432, 106, }, /* 1266 */ - { 134, 7, 12, 0, 0, 18432, 82, }, /* 1267 */ - { 142, 7, 12, 0, 0, 18432, 82, }, /* 1268 */ - { 142, 10, 5, 0, 0, 18432, 144, }, /* 1269 */ - { 142, 12, 3, 0, 0, 26624, 130, }, /* 1270 */ - { 142, 12, 3, 0, 0, 18432, 146, }, /* 1271 */ - { 142, 21, 12, 0, 0, 18432, 124, }, /* 1272 */ - { 142, 21, 12, 0, 0, 18432, 106, }, /* 1273 */ - { 142, 21, 12, 0, 0, 18432, 68, }, /* 1274 */ - { 142, 13, 12, 0, 0, 18432, 138, }, /* 1275 */ - { 142, 15, 12, 0, 0, 18432, 68, }, /* 1276 */ - { 143, 21, 12, 0, 0, 18432, 68, }, /* 1277 */ - { 143, 21, 12, 0, 0, 18432, 106, }, /* 1278 */ - { 143, 7, 12, 0, 0, 18432, 82, }, /* 1279 */ - { 143, 12, 3, 0, 0, 26624, 130, }, /* 1280 */ - { 143, 10, 5, 0, 0, 18432, 144, }, /* 1281 */ - { 59, 7, 12, 0, 0, 18432, 82, }, /* 1282 */ - { 59, 12, 3, 0, 0, 26624, 130, }, /* 1283 */ - { 59, 12, 3, 0, 0, 26624, 96, }, /* 1284 */ - { 59, 12, 3, 0, 0, 26624, 146, }, /* 1285 */ - { 59, 7, 4, 0, 0, 18432, 82, }, /* 1286 */ - { 59, 13, 12, 0, 0, 18432, 138, }, /* 1287 */ - { 61, 7, 12, 0, 0, 18432, 82, }, /* 1288 */ - { 61, 10, 5, 0, 0, 18432, 144, }, /* 1289 */ - { 61, 12, 3, 0, 0, 26624, 130, }, /* 1290 */ - { 61, 12, 3, 0, 0, 26624, 146, }, /* 1291 */ - { 61, 13, 12, 0, 0, 18432, 138, }, /* 1292 */ - { 150, 7, 12, 0, 0, 18432, 82, }, /* 1293 */ - { 150, 12, 3, 0, 0, 26624, 130, }, /* 1294 */ - { 150, 10, 5, 0, 0, 18432, 144, }, /* 1295 */ - { 150, 21, 12, 0, 0, 18432, 124, }, /* 1296 */ - { 162, 12, 3, 0, 0, 26624, 130, }, /* 1297 */ - { 162, 7, 4, 0, 0, 18432, 82, }, /* 1298 */ - { 162, 10, 5, 0, 0, 18432, 144, }, /* 1299 */ - { 162, 7, 12, 0, 0, 18432, 82, }, /* 1300 */ - { 162, 10, 5, 0, 0, 18432, 176, }, /* 1301 */ - { 162, 12, 3, 0, 0, 26624, 184, }, /* 1302 */ - { 162, 21, 12, 0, 0, 18432, 124, }, /* 1303 */ - { 162, 21, 12, 0, 0, 18432, 68, }, /* 1304 */ - { 162, 13, 12, 0, 0, 18432, 138, }, /* 1305 */ - { 11, 15, 12, 0, 0, 18432, 68, }, /* 1306 */ - { 11, 21, 12, 0, 0, 18432, 68, }, /* 1307 */ - { 94, 7, 12, 0, 0, 18432, 82, }, /* 1308 */ - { 94, 14, 12, 0, 0, 18432, 82, }, /* 1309 */ - { 94, 21, 12, 0, 0, 18432, 106, }, /* 1310 */ - { 66, 7, 12, 0, 0, 18432, 82, }, /* 1311 */ - { 66, 21, 12, 0, 0, 18432, 68, }, /* 1312 */ - { 109, 7, 12, 0, 0, 18432, 82, }, /* 1313 */ - { 109, 1, 2, 0, 0, 18432, 322, }, /* 1314 */ - { 109, 12, 3, 0, 0, 26624, 102, }, /* 1315 */ - { 109, 12, 3, 0, 0, 26624, 96, }, /* 1316 */ - { 138, 7, 12, 0, 0, 18432, 82, }, /* 1317 */ - { 130, 7, 12, 0, 0, 18432, 82, }, /* 1318 */ - { 130, 13, 12, 0, 0, 18432, 138, }, /* 1319 */ - { 130, 21, 12, 0, 0, 18432, 124, }, /* 1320 */ - { 159, 7, 12, 0, 0, 18432, 82, }, /* 1321 */ - { 159, 13, 12, 0, 0, 18432, 138, }, /* 1322 */ - { 126, 7, 12, 0, 0, 18432, 82, }, /* 1323 */ - { 126, 12, 3, 0, 0, 26624, 96, }, /* 1324 */ - { 126, 21, 12, 0, 0, 18432, 124, }, /* 1325 */ - { 128, 7, 12, 0, 0, 18432, 82, }, /* 1326 */ - { 128, 12, 3, 0, 0, 26624, 96, }, /* 1327 */ - { 128, 21, 12, 0, 0, 18432, 124, }, /* 1328 */ - { 128, 21, 12, 0, 0, 18432, 106, }, /* 1329 */ - { 128, 21, 12, 0, 0, 18432, 68, }, /* 1330 */ - { 128, 26, 12, 0, 0, 18432, 68, }, /* 1331 */ - { 128, 6, 12, 0, 0, 18432, 142, }, /* 1332 */ - { 128, 6, 12, 0, 0, 18432, 136, }, /* 1333 */ - { 128, 13, 12, 0, 0, 18432, 138, }, /* 1334 */ - { 128, 15, 12, 0, 0, 18432, 68, }, /* 1335 */ - { 151, 9, 12, 0, 32, 18432, 74, }, /* 1336 */ - { 151, 5, 12, 0, -32, 18432, 76, }, /* 1337 */ - { 151, 15, 12, 0, 0, 18432, 68, }, /* 1338 */ - { 151, 21, 12, 0, 0, 18432, 106, }, /* 1339 */ - { 151, 21, 12, 0, 0, 18432, 124, }, /* 1340 */ - { 151, 21, 12, 0, 0, 18432, 68, }, /* 1341 */ - { 123, 7, 12, 0, 0, 18432, 82, }, /* 1342 */ - { 123, 12, 3, 0, 0, 26624, 130, }, /* 1343 */ - { 123, 10, 5, 0, 0, 18432, 144, }, /* 1344 */ - { 123, 12, 3, 0, 0, 26624, 128, }, /* 1345 */ - { 123, 6, 12, 0, 0, 18432, 92, }, /* 1346 */ - { 146, 6, 12, 0, 0, 18432, 136, }, /* 1347 */ - { 147, 6, 12, 0, 0, 18432, 136, }, /* 1348 */ - { 23, 21, 12, 0, 0, 28672, 68, }, /* 1349 */ - { 158, 12, 3, 0, 0, 26624, 328, }, /* 1350 */ - { 23, 10, 5, 0, 0, 18432, 164, }, /* 1351 */ - { 146, 7, 12, 0, 0, 18432, 284, }, /* 1352 */ - { 158, 7, 12, 0, 0, 18432, 284, }, /* 1353 */ - { 21, 6, 12, 0, 0, 18432, 92, }, /* 1354 */ - { 147, 7, 12, 0, 0, 18432, 284, }, /* 1355 */ - { 46, 7, 12, 0, 0, 18432, 82, }, /* 1356 */ - { 46, 26, 12, 0, 0, 18432, 68, }, /* 1357 */ - { 46, 12, 3, 0, 0, 26624, 102, }, /* 1358 */ - { 46, 12, 3, 0, 0, 26624, 130, }, /* 1359 */ - { 46, 21, 12, 0, 0, 18432, 124, }, /* 1360 */ - { 69, 1, 2, 0, 0, 6153, 66, }, /* 1361 */ - { 69, 10, 3, 0, 0, 18432, 330, }, /* 1362 */ - { 69, 10, 5, 0, 0, 18432, 138, }, /* 1363 */ - { 69, 10, 5, 0, 0, 18432, 160, }, /* 1364 */ - { 69, 10, 3, 0, 0, 18432, 286, }, /* 1365 */ - { 1, 12, 3, 0, 0, 26624, 102, }, /* 1366 */ - { 69, 25, 12, 0, 0, 18432, 118, }, /* 1367 */ - { 69, 13, 12, 0, 0, 10240, 214, }, /* 1368 */ - { 141, 26, 12, 0, 0, 18432, 68, }, /* 1369 */ - { 141, 12, 3, 0, 0, 26624, 102, }, /* 1370 */ - { 141, 21, 12, 0, 0, 18432, 106, }, /* 1371 */ - { 141, 21, 12, 0, 0, 18432, 124, }, /* 1372 */ - { 141, 21, 12, 0, 0, 18432, 68, }, /* 1373 */ - { 35, 12, 3, 0, 0, 26624, 130, }, /* 1374 */ - { 2, 6, 12, 0, 0, 18432, 90, }, /* 1375 */ - { 154, 7, 12, 0, 0, 18432, 82, }, /* 1376 */ - { 154, 12, 3, 0, 0, 26624, 96, }, /* 1377 */ - { 154, 6, 12, 0, 0, 18432, 142, }, /* 1378 */ - { 154, 6, 12, 0, 0, 18432, 136, }, /* 1379 */ - { 154, 13, 12, 0, 0, 18432, 138, }, /* 1380 */ - { 154, 26, 12, 0, 0, 18432, 68, }, /* 1381 */ - { 160, 7, 12, 0, 0, 18432, 82, }, /* 1382 */ - { 160, 12, 3, 0, 0, 26624, 96, }, /* 1383 */ - { 155, 7, 12, 0, 0, 18432, 82, }, /* 1384 */ - { 155, 12, 3, 0, 0, 26624, 96, }, /* 1385 */ - { 155, 13, 12, 0, 0, 18432, 138, }, /* 1386 */ - { 155, 23, 12, 0, 0, 14336, 68, }, /* 1387 */ - { 163, 7, 12, 0, 0, 18432, 82, }, /* 1388 */ - { 163, 6, 12, 0, 0, 18432, 142, }, /* 1389 */ - { 163, 12, 3, 0, 0, 26624, 102, }, /* 1390 */ - { 163, 13, 12, 0, 0, 18432, 138, }, /* 1391 */ - { 129, 7, 12, 0, 0, 34816, 82, }, /* 1392 */ - { 129, 15, 12, 0, 0, 34816, 68, }, /* 1393 */ - { 129, 12, 3, 0, 0, 26624, 96, }, /* 1394 */ - { 58, 9, 12, 0, 34, 34816, 74, }, /* 1395 */ - { 58, 5, 12, 0, -34, 34816, 76, }, /* 1396 */ - { 58, 12, 3, 0, 0, 26624, 150, }, /* 1397 */ - { 58, 12, 3, 0, 0, 26624, 130, }, /* 1398 */ - { 58, 12, 3, 0, 0, 26624, 96, }, /* 1399 */ - { 58, 6, 12, 0, 0, 34816, 142, }, /* 1400 */ - { 58, 13, 12, 0, 0, 34816, 138, }, /* 1401 */ - { 58, 21, 12, 0, 0, 34816, 68, }, /* 1402 */ - { 69, 15, 12, 0, 0, 0, 68, }, /* 1403 */ - { 69, 26, 12, 0, 0, 0, 68, }, /* 1404 */ - { 69, 23, 12, 0, 0, 0, 68, }, /* 1405 */ - { 3, 7, 12, 0, 0, 0, 240, }, /* 1406 */ - { 69, 26, 14, 0, 0, 28672, 332, }, /* 1407 */ - { 69, 26, 14, 0, 0, 28672, 334, }, /* 1408 */ - { 68, 2, 14, 0, 0, 18432, 336, }, /* 1409 */ - { 69, 26, 12, 0, 0, 18432, 338, }, /* 1410 */ - { 69, 26, 14, 0, 0, 18432, 340, }, /* 1411 */ - { 69, 26, 14, 0, 0, 18432, 334, }, /* 1412 */ - { 69, 26, 11, 0, 0, 18432, 342, }, /* 1413 */ - { 20, 26, 12, 0, 0, 18432, 68, }, /* 1414 */ - { 69, 26, 14, 0, 0, 18432, 236, }, /* 1415 */ - { 69, 26, 14, 0, 0, 18447, 334, }, /* 1416 */ - { 69, 26, 14, 0, 0, 28672, 344, }, /* 1417 */ - { 69, 26, 14, 0, 0, 28672, 346, }, /* 1418 */ - { 69, 24, 3, 0, 0, 28672, 348, }, /* 1419 */ - { 69, 26, 14, 0, 0, 28672, 350, }, /* 1420 */ - { 69, 13, 12, 0, 0, 10240, 138, }, /* 1421 */ - { 69, 1, 3, 0, 0, 6144, 352, }, /* 1422 */ + { 0, 5, 12, 0, 195, 18432, 78, }, /* 74 */ + { 0, 9, 12, 0, 210, 18432, 76, }, /* 75 */ + { 0, 9, 12, 0, 206, 18432, 76, }, /* 76 */ + { 0, 9, 12, 0, 205, 18432, 76, }, /* 77 */ + { 0, 9, 12, 0, 79, 18432, 76, }, /* 78 */ + { 0, 9, 12, 0, 202, 18432, 76, }, /* 79 */ + { 0, 9, 12, 0, 203, 18432, 76, }, /* 80 */ + { 0, 9, 12, 0, 207, 18432, 76, }, /* 81 */ + { 0, 5, 12, 0, 97, 18432, 78, }, /* 82 */ + { 0, 9, 12, 0, 211, 18432, 76, }, /* 83 */ + { 0, 9, 12, 0, 209, 18432, 76, }, /* 84 */ + { 0, 5, 12, 0, 163, 18432, 78, }, /* 85 */ + { 0, 5, 12, 0, 42561, 18432, 78, }, /* 86 */ + { 0, 9, 12, 0, 213, 18432, 76, }, /* 87 */ + { 0, 5, 12, 0, 130, 18432, 78, }, /* 88 */ + { 0, 9, 12, 0, 214, 18432, 76, }, /* 89 */ + { 0, 9, 12, 0, 218, 18432, 76, }, /* 90 */ + { 0, 9, 12, 0, 217, 18432, 76, }, /* 91 */ + { 0, 9, 12, 0, 219, 18432, 76, }, /* 92 */ + { 0, 7, 12, 0, 0, 18432, 84, }, /* 93 */ + { 0, 5, 12, 0, 56, 18432, 78, }, /* 94 */ + { 0, 9, 12, 5, 2, 18432, 86, }, /* 95 */ + { 0, 8, 12, 5, 1, 18432, 88, }, /* 96 */ + { 0, 5, 12, 5, -2, 18432, 78, }, /* 97 */ + { 0, 9, 12, 9, 2, 18432, 86, }, /* 98 */ + { 0, 8, 12, 9, 1, 18432, 88, }, /* 99 */ + { 0, 5, 12, 9, -2, 18432, 78, }, /* 100 */ + { 0, 9, 12, 13, 2, 18432, 86, }, /* 101 */ + { 0, 8, 12, 13, 1, 18432, 88, }, /* 102 */ + { 0, 5, 12, 13, -2, 18432, 78, }, /* 103 */ + { 0, 5, 12, 0, -79, 18432, 78, }, /* 104 */ + { 0, 9, 12, 17, 2, 18432, 86, }, /* 105 */ + { 0, 8, 12, 17, 1, 18432, 88, }, /* 106 */ + { 0, 5, 12, 17, -2, 18432, 78, }, /* 107 */ + { 0, 9, 12, 0, -97, 18432, 76, }, /* 108 */ + { 0, 9, 12, 0, -56, 18432, 76, }, /* 109 */ + { 0, 9, 12, 0, -130, 18432, 76, }, /* 110 */ + { 0, 9, 12, 0, 10795, 18432, 76, }, /* 111 */ + { 0, 9, 12, 0, -163, 18432, 76, }, /* 112 */ + { 0, 9, 12, 0, 10792, 18432, 76, }, /* 113 */ + { 0, 5, 12, 0, 10815, 18432, 78, }, /* 114 */ + { 0, 9, 12, 0, -195, 18432, 76, }, /* 115 */ + { 0, 9, 12, 0, 69, 18432, 76, }, /* 116 */ + { 0, 9, 12, 0, 71, 18432, 76, }, /* 117 */ + { 0, 5, 12, 0, 10783, 18432, 78, }, /* 118 */ + { 0, 5, 12, 0, 10780, 18432, 78, }, /* 119 */ + { 0, 5, 12, 0, 10782, 18432, 78, }, /* 120 */ + { 0, 5, 12, 0, -210, 18432, 78, }, /* 121 */ + { 0, 5, 12, 0, -206, 18432, 78, }, /* 122 */ + { 0, 5, 12, 0, -205, 18432, 78, }, /* 123 */ + { 0, 5, 12, 0, -202, 18432, 78, }, /* 124 */ + { 0, 5, 12, 0, -203, 18432, 78, }, /* 125 */ + { 0, 5, 12, 0, 42319, 18432, 78, }, /* 126 */ + { 0, 5, 12, 0, 42315, 18432, 78, }, /* 127 */ + { 0, 5, 12, 0, -207, 18432, 78, }, /* 128 */ + { 0, 5, 12, 0, 42343, 18432, 78, }, /* 129 */ + { 0, 5, 12, 0, 42280, 18432, 78, }, /* 130 */ + { 0, 5, 12, 0, 42308, 18432, 78, }, /* 131 */ + { 0, 5, 12, 0, -209, 18432, 80, }, /* 132 */ + { 0, 5, 12, 0, -211, 18432, 78, }, /* 133 */ + { 0, 5, 12, 0, 10743, 18432, 78, }, /* 134 */ + { 0, 5, 12, 0, 42305, 18432, 78, }, /* 135 */ + { 0, 5, 12, 0, 10749, 18432, 78, }, /* 136 */ + { 0, 5, 12, 0, -213, 18432, 78, }, /* 137 */ + { 0, 5, 12, 0, -214, 18432, 78, }, /* 138 */ + { 0, 5, 12, 0, 10727, 18432, 78, }, /* 139 */ + { 0, 5, 12, 0, -218, 18432, 78, }, /* 140 */ + { 0, 5, 12, 0, 42307, 18432, 78, }, /* 141 */ + { 0, 5, 12, 0, 42282, 18432, 78, }, /* 142 */ + { 0, 5, 12, 0, -69, 18432, 78, }, /* 143 */ + { 0, 5, 12, 0, -217, 18432, 78, }, /* 144 */ + { 0, 5, 12, 0, -71, 18432, 78, }, /* 145 */ + { 0, 5, 12, 0, -219, 18432, 78, }, /* 146 */ + { 0, 5, 12, 0, 42261, 18432, 80, }, /* 147 */ + { 0, 5, 12, 0, 42258, 18432, 78, }, /* 148 */ + { 0, 6, 12, 0, 0, 18432, 90, }, /* 149 */ + { 0, 6, 12, 0, 0, 18432, 92, }, /* 150 */ + { 99, 6, 12, 0, 0, 28672, 94, }, /* 151 */ + { 99, 6, 12, 0, 0, 18432, 94, }, /* 152 */ + { 99, 6, 12, 0, 0, 18440, 94, }, /* 153 */ + { 99, 6, 12, 0, 0, 18432, 90, }, /* 154 */ + { 99, 6, 12, 0, 0, 28684, 94, }, /* 155 */ + { 99, 6, 12, 0, 0, 28688, 94, }, /* 156 */ + { 99, 6, 12, 0, 0, 18432, 96, }, /* 157 */ + { 99, 24, 12, 0, 0, 28692, 56, }, /* 158 */ + { 99, 24, 12, 0, 0, 28684, 56, }, /* 159 */ + { 29, 24, 12, 0, 0, 28672, 56, }, /* 160 */ + { 106, 12, 3, 0, 0, 26648, 98, }, /* 161 */ + { 106, 12, 3, 0, 0, 26652, 98, }, /* 162 */ + { 106, 12, 3, 0, 0, 26656, 98, }, /* 163 */ + { 106, 12, 3, 0, 0, 26660, 98, }, /* 164 */ + { 106, 12, 3, 0, 0, 26664, 98, }, /* 165 */ + { 106, 12, 3, 0, 0, 26668, 98, }, /* 166 */ + { 106, 12, 3, 0, 0, 26672, 98, }, /* 167 */ + { 106, 12, 3, 0, 0, 26676, 98, }, /* 168 */ + { 106, 12, 3, 0, 0, 26680, 98, }, /* 169 */ + { 106, 12, 3, 0, 0, 26684, 98, }, /* 170 */ + { 106, 12, 3, 0, 0, 26688, 98, }, /* 171 */ + { 106, 12, 3, 0, 0, 26692, 98, }, /* 172 */ + { 106, 12, 3, 0, 0, 26696, 98, }, /* 173 */ + { 106, 12, 3, 0, 0, 26700, 98, }, /* 174 */ + { 106, 12, 3, 0, 0, 26704, 98, }, /* 175 */ + { 106, 12, 3, 0, 0, 26624, 98, }, /* 176 */ + { 106, 12, 3, 0, 0, 26708, 98, }, /* 177 */ + { 106, 12, 3, 0, 0, 26712, 98, }, /* 178 */ + { 106, 12, 3, 0, 0, 26716, 98, }, /* 179 */ + { 106, 12, 3, 0, 0, 26720, 98, }, /* 180 */ + { 106, 12, 3, 0, 0, 26724, 98, }, /* 181 */ + { 106, 12, 3, 0, 0, 26728, 98, }, /* 182 */ + { 106, 12, 3, 0, 0, 26732, 98, }, /* 183 */ + { 106, 12, 3, 0, 0, 26736, 98, }, /* 184 */ + { 106, 12, 3, 0, 0, 26740, 98, }, /* 185 */ + { 106, 12, 3, 21, 116, 26740, 100, }, /* 186 */ + { 106, 12, 3, 0, 0, 26624, 102, }, /* 187 */ + { 106, 12, 3, 0, 0, 26744, 104, }, /* 188 */ + { 106, 12, 3, 0, 0, 26624, 104, }, /* 189 */ + { 106, 12, 3, 0, 0, 26748, 98, }, /* 190 */ + { 106, 12, 3, 0, 0, 26752, 106, }, /* 191 */ + { 1, 9, 12, 0, 1, 18432, 76, }, /* 192 */ + { 1, 5, 12, 0, -1, 18432, 78, }, /* 193 */ + { 99, 6, 12, 0, 0, 28804, 94, }, /* 194 */ + { 1, 24, 12, 0, 0, 28804, 56, }, /* 195 */ + { 98, 2, 12, 0, 0, 18432, 0, }, /* 196 */ + { 1, 6, 12, 0, 0, 18432, 108, }, /* 197 */ + { 1, 5, 12, 0, 130, 18432, 78, }, /* 198 */ + { 99, 21, 12, 0, 0, 28672, 110, }, /* 199 */ + { 1, 9, 12, 0, 116, 18432, 76, }, /* 200 */ + { 1, 24, 12, 0, 0, 28672, 56, }, /* 201 */ + { 1, 9, 12, 0, 38, 18432, 76, }, /* 202 */ + { 99, 21, 12, 0, 0, 28672, 112, }, /* 203 */ + { 1, 9, 12, 0, 37, 18432, 76, }, /* 204 */ + { 1, 9, 12, 0, 64, 18432, 76, }, /* 205 */ + { 1, 9, 12, 0, 63, 18432, 76, }, /* 206 */ + { 1, 5, 12, 0, 7235, 18432, 78, }, /* 207 */ + { 1, 9, 12, 0, 32, 18432, 76, }, /* 208 */ + { 1, 9, 12, 34, 32, 18432, 76, }, /* 209 */ + { 1, 9, 12, 59, 32, 18432, 76, }, /* 210 */ + { 1, 9, 12, 38, 32, 18432, 76, }, /* 211 */ + { 1, 9, 12, 21, 32, 18432, 76, }, /* 212 */ + { 1, 9, 12, 51, 32, 18432, 76, }, /* 213 */ + { 1, 9, 12, 26, 32, 18432, 76, }, /* 214 */ + { 1, 9, 12, 47, 32, 18432, 76, }, /* 215 */ + { 1, 9, 12, 55, 32, 18432, 76, }, /* 216 */ + { 1, 9, 12, 30, 32, 18432, 76, }, /* 217 */ + { 1, 9, 12, 43, 32, 18432, 76, }, /* 218 */ + { 1, 9, 12, 96, 32, 18432, 76, }, /* 219 */ + { 1, 5, 12, 0, -38, 18432, 78, }, /* 220 */ + { 1, 5, 12, 0, -37, 18432, 78, }, /* 221 */ + { 1, 5, 12, 0, 7219, 18432, 78, }, /* 222 */ + { 1, 5, 12, 0, -32, 18432, 78, }, /* 223 */ + { 1, 5, 12, 34, -32, 18432, 78, }, /* 224 */ + { 1, 5, 12, 59, -32, 18432, 78, }, /* 225 */ + { 1, 5, 12, 38, -32, 18432, 78, }, /* 226 */ + { 1, 5, 12, 21, -116, 18432, 78, }, /* 227 */ + { 1, 5, 12, 51, -32, 18432, 78, }, /* 228 */ + { 1, 5, 12, 26, -775, 18432, 78, }, /* 229 */ + { 1, 5, 12, 47, -32, 18432, 78, }, /* 230 */ + { 1, 5, 12, 55, -32, 18432, 78, }, /* 231 */ + { 1, 5, 12, 30, 1, 18432, 70, }, /* 232 */ + { 1, 5, 12, 30, -32, 18432, 78, }, /* 233 */ + { 1, 5, 12, 43, -32, 18432, 78, }, /* 234 */ + { 1, 5, 12, 96, -32, 18432, 78, }, /* 235 */ + { 1, 5, 12, 0, -64, 18432, 78, }, /* 236 */ + { 1, 5, 12, 0, -63, 18432, 78, }, /* 237 */ + { 1, 9, 12, 0, 8, 18432, 76, }, /* 238 */ + { 1, 5, 12, 34, -30, 18432, 114, }, /* 239 */ + { 1, 5, 12, 38, -25, 18432, 114, }, /* 240 */ + { 1, 9, 12, 0, 0, 18432, 116, }, /* 241 */ + { 1, 9, 12, 0, 0, 18432, 118, }, /* 242 */ + { 1, 5, 12, 43, -15, 18432, 114, }, /* 243 */ + { 1, 5, 12, 47, -22, 18432, 70, }, /* 244 */ + { 1, 5, 12, 0, -8, 18432, 78, }, /* 245 */ + { 43, 9, 12, 0, 1, 18432, 76, }, /* 246 */ + { 43, 5, 12, 0, -1, 18432, 78, }, /* 247 */ + { 1, 5, 12, 51, -54, 18432, 114, }, /* 248 */ + { 1, 5, 12, 55, -48, 18432, 114, }, /* 249 */ + { 1, 5, 12, 0, 7, 18432, 78, }, /* 250 */ + { 1, 5, 12, 0, -116, 18432, 80, }, /* 251 */ + { 1, 9, 12, 38, -60, 18432, 120, }, /* 252 */ + { 1, 5, 12, 59, -64, 18432, 114, }, /* 253 */ + { 1, 25, 12, 0, 0, 28672, 122, }, /* 254 */ + { 1, 9, 12, 0, -7, 18432, 76, }, /* 255 */ + { 1, 5, 12, 0, 0, 18432, 60, }, /* 256 */ + { 1, 9, 12, 0, -130, 18432, 76, }, /* 257 */ + { 2, 9, 12, 0, 80, 18432, 76, }, /* 258 */ + { 2, 9, 12, 0, 32, 18432, 76, }, /* 259 */ + { 2, 9, 12, 63, 32, 18432, 76, }, /* 260 */ + { 2, 9, 12, 67, 32, 18432, 76, }, /* 261 */ + { 2, 9, 12, 71, 32, 18432, 76, }, /* 262 */ + { 2, 9, 12, 75, 32, 18432, 76, }, /* 263 */ + { 2, 9, 12, 79, 32, 18432, 76, }, /* 264 */ + { 2, 9, 12, 84, 32, 18432, 76, }, /* 265 */ + { 2, 5, 12, 0, -32, 18432, 78, }, /* 266 */ + { 2, 5, 12, 63, -32, 18432, 78, }, /* 267 */ + { 2, 5, 12, 67, -32, 18432, 78, }, /* 268 */ + { 2, 5, 12, 71, -32, 18432, 78, }, /* 269 */ + { 2, 5, 12, 75, -32, 18432, 78, }, /* 270 */ + { 2, 5, 12, 79, -32, 18432, 78, }, /* 271 */ + { 2, 5, 12, 84, -32, 18432, 78, }, /* 272 */ + { 2, 5, 12, 0, -80, 18432, 78, }, /* 273 */ + { 2, 5, 12, 0, -80, 18432, 80, }, /* 274 */ + { 2, 9, 12, 0, 1, 18432, 76, }, /* 275 */ + { 2, 5, 12, 0, -1, 18432, 78, }, /* 276 */ + { 2, 9, 12, 88, 1, 18432, 76, }, /* 277 */ + { 2, 5, 12, 88, -1, 18432, 78, }, /* 278 */ + { 2, 26, 12, 0, 0, 18432, 74, }, /* 279 */ + { 2, 12, 3, 0, 0, 26760, 98, }, /* 280 */ + { 2, 12, 3, 0, 0, 26764, 98, }, /* 281 */ + { 106, 12, 3, 0, 0, 26768, 98, }, /* 282 */ + { 2, 11, 3, 0, 0, 26624, 124, }, /* 283 */ + { 2, 9, 12, 0, 15, 18432, 76, }, /* 284 */ + { 2, 5, 12, 0, -15, 18432, 78, }, /* 285 */ + { 3, 9, 12, 0, 48, 18432, 76, }, /* 286 */ + { 3, 6, 12, 0, 0, 18432, 94, }, /* 287 */ + { 3, 21, 12, 0, 0, 18432, 74, }, /* 288 */ + { 3, 21, 12, 0, 0, 18432, 126, }, /* 289 */ + { 3, 5, 12, 0, 0, 18432, 60, }, /* 290 */ + { 3, 5, 12, 0, -48, 18432, 78, }, /* 291 */ + { 3, 5, 12, 0, 0, 18432, 70, }, /* 292 */ + { 3, 21, 12, 0, 0, 18580, 128, }, /* 293 */ + { 3, 17, 12, 0, 0, 28672, 130, }, /* 294 */ + { 3, 26, 12, 0, 0, 28672, 74, }, /* 295 */ + { 3, 23, 12, 0, 0, 14336, 74, }, /* 296 */ + { 98, 2, 12, 0, 0, 34816, 0, }, /* 297 */ + { 4, 12, 3, 0, 0, 26624, 98, }, /* 298 */ + { 4, 12, 3, 0, 0, 26624, 104, }, /* 299 */ + { 4, 12, 3, 0, 0, 26624, 132, }, /* 300 */ + { 4, 17, 12, 0, 0, 34816, 130, }, /* 301 */ + { 4, 21, 12, 0, 0, 34816, 74, }, /* 302 */ + { 4, 21, 12, 0, 0, 34816, 110, }, /* 303 */ + { 4, 12, 3, 0, 0, 26624, 106, }, /* 304 */ + { 4, 7, 12, 0, 0, 34816, 84, }, /* 305 */ + { 4, 21, 12, 0, 0, 34816, 126, }, /* 306 */ + { 5, 1, 4, 0, 0, 2048, 134, }, /* 307 */ + { 99, 1, 4, 0, 0, 2048, 134, }, /* 308 */ + { 5, 25, 12, 0, 0, 28672, 122, }, /* 309 */ + { 5, 25, 12, 0, 0, 0, 122, }, /* 310 */ + { 5, 21, 12, 0, 0, 14336, 74, }, /* 311 */ + { 5, 23, 12, 0, 0, 0, 74, }, /* 312 */ + { 99, 21, 12, 0, 0, 8344, 110, }, /* 313 */ + { 5, 21, 12, 0, 0, 0, 74, }, /* 314 */ + { 5, 26, 12, 0, 0, 28672, 74, }, /* 315 */ + { 5, 12, 3, 0, 0, 26624, 106, }, /* 316 */ + { 99, 21, 12, 0, 0, 152, 110, }, /* 317 */ + { 5, 1, 2, 0, 0, 156, 136, }, /* 318 */ + { 5, 21, 12, 0, 0, 0, 128, }, /* 319 */ + { 99, 21, 12, 0, 0, 160, 128, }, /* 320 */ + { 5, 7, 12, 0, 0, 0, 84, }, /* 321 */ + { 99, 6, 12, 0, 0, 164, 138, }, /* 322 */ + { 106, 12, 3, 0, 0, 26792, 132, }, /* 323 */ + { 106, 12, 3, 0, 0, 26792, 106, }, /* 324 */ + { 106, 12, 3, 0, 0, 26792, 140, }, /* 325 */ + { 5, 12, 3, 0, 0, 26624, 132, }, /* 326 */ + { 5, 12, 3, 0, 0, 26624, 142, }, /* 327 */ + { 5, 13, 12, 0, 0, 2220, 144, }, /* 328 */ + { 5, 21, 12, 0, 0, 2048, 74, }, /* 329 */ + { 5, 7, 12, 0, 0, 0, 146, }, /* 330 */ + { 5, 21, 12, 0, 0, 176, 128, }, /* 331 */ + { 5, 12, 3, 0, 0, 26624, 140, }, /* 332 */ + { 5, 12, 3, 0, 0, 26624, 98, }, /* 333 */ + { 5, 6, 12, 0, 0, 0, 94, }, /* 334 */ + { 5, 13, 12, 0, 0, 10240, 144, }, /* 335 */ + { 5, 26, 12, 0, 0, 0, 74, }, /* 336 */ + { 6, 21, 12, 0, 0, 0, 128, }, /* 337 */ + { 6, 21, 12, 0, 0, 0, 110, }, /* 338 */ + { 6, 21, 12, 0, 0, 0, 74, }, /* 339 */ + { 98, 2, 12, 0, 0, 0, 0, }, /* 340 */ + { 6, 1, 4, 0, 0, 0, 134, }, /* 341 */ + { 6, 7, 12, 0, 0, 0, 84, }, /* 342 */ + { 6, 12, 3, 0, 0, 26624, 106, }, /* 343 */ + { 6, 12, 3, 0, 0, 26624, 132, }, /* 344 */ + { 6, 12, 3, 0, 0, 26624, 98, }, /* 345 */ + { 7, 7, 12, 0, 0, 0, 84, }, /* 346 */ + { 7, 12, 3, 0, 0, 26624, 132, }, /* 347 */ + { 48, 13, 12, 0, 0, 34816, 144, }, /* 348 */ + { 48, 7, 12, 0, 0, 34816, 84, }, /* 349 */ + { 48, 12, 3, 0, 0, 26624, 98, }, /* 350 */ + { 48, 6, 12, 0, 0, 34816, 94, }, /* 351 */ + { 48, 26, 12, 0, 0, 28672, 74, }, /* 352 */ + { 48, 21, 12, 0, 0, 28672, 74, }, /* 353 */ + { 48, 21, 12, 0, 0, 28672, 110, }, /* 354 */ + { 48, 21, 12, 0, 0, 28672, 128, }, /* 355 */ + { 48, 6, 12, 0, 0, 34816, 138, }, /* 356 */ + { 48, 12, 3, 0, 0, 26624, 104, }, /* 357 */ + { 48, 23, 12, 0, 0, 34816, 74, }, /* 358 */ + { 54, 7, 12, 0, 0, 34816, 84, }, /* 359 */ + { 54, 12, 3, 0, 0, 26624, 106, }, /* 360 */ + { 54, 12, 3, 0, 0, 26624, 98, }, /* 361 */ + { 54, 6, 12, 0, 0, 34816, 148, }, /* 362 */ + { 54, 12, 3, 0, 0, 26624, 104, }, /* 363 */ + { 54, 21, 12, 0, 0, 34816, 110, }, /* 364 */ + { 54, 21, 12, 0, 0, 34816, 74, }, /* 365 */ + { 54, 21, 12, 0, 0, 34816, 128, }, /* 366 */ + { 59, 7, 12, 0, 0, 34816, 84, }, /* 367 */ + { 59, 12, 3, 0, 0, 26624, 104, }, /* 368 */ + { 59, 21, 12, 0, 0, 34816, 110, }, /* 369 */ + { 5, 24, 12, 0, 0, 0, 126, }, /* 370 */ + { 5, 12, 3, 0, 0, 26624, 150, }, /* 371 */ + { 5, 12, 3, 0, 0, 26624, 104, }, /* 372 */ + { 5, 12, 3, 0, 0, 26624, 152, }, /* 373 */ + { 8, 12, 3, 0, 0, 26624, 106, }, /* 374 */ + { 8, 10, 5, 0, 0, 18432, 154, }, /* 375 */ + { 8, 7, 12, 0, 0, 18432, 84, }, /* 376 */ + { 8, 7, 12, 0, 0, 18432, 156, }, /* 377 */ + { 8, 12, 3, 0, 0, 26624, 98, }, /* 378 */ + { 8, 12, 3, 0, 0, 26624, 158, }, /* 379 */ + { 106, 12, 3, 0, 0, 26804, 98, }, /* 380 */ + { 106, 12, 3, 0, 0, 26808, 98, }, /* 381 */ + { 99, 21, 12, 0, 0, 18620, 128, }, /* 382 */ + { 99, 21, 12, 0, 0, 18624, 128, }, /* 383 */ + { 8, 13, 12, 0, 0, 18628, 144, }, /* 384 */ + { 8, 21, 12, 0, 0, 18432, 74, }, /* 385 */ + { 8, 6, 12, 0, 0, 18432, 94, }, /* 386 */ + { 9, 7, 12, 0, 0, 18432, 84, }, /* 387 */ + { 9, 12, 3, 0, 0, 26624, 106, }, /* 388 */ + { 9, 10, 5, 0, 0, 18432, 154, }, /* 389 */ + { 9, 7, 12, 0, 0, 18432, 156, }, /* 390 */ + { 9, 12, 3, 0, 0, 26624, 98, }, /* 391 */ + { 9, 10, 3, 0, 0, 18432, 160, }, /* 392 */ + { 9, 12, 3, 0, 0, 26624, 158, }, /* 393 */ + { 9, 13, 12, 0, 0, 18632, 144, }, /* 394 */ + { 9, 23, 12, 0, 0, 14336, 74, }, /* 395 */ + { 9, 15, 12, 0, 0, 18432, 74, }, /* 396 */ + { 9, 26, 12, 0, 0, 18432, 74, }, /* 397 */ + { 9, 21, 12, 0, 0, 18432, 74, }, /* 398 */ + { 9, 12, 3, 0, 0, 26624, 104, }, /* 399 */ + { 10, 12, 3, 0, 0, 26624, 106, }, /* 400 */ + { 10, 10, 5, 0, 0, 18432, 154, }, /* 401 */ + { 10, 7, 12, 0, 0, 18432, 84, }, /* 402 */ + { 10, 12, 3, 0, 0, 26624, 98, }, /* 403 */ + { 10, 12, 3, 0, 0, 26624, 158, }, /* 404 */ + { 10, 13, 12, 0, 0, 18636, 144, }, /* 405 */ + { 10, 12, 3, 0, 0, 26624, 162, }, /* 406 */ + { 10, 21, 12, 0, 0, 18432, 74, }, /* 407 */ + { 11, 12, 3, 0, 0, 26624, 106, }, /* 408 */ + { 11, 10, 5, 0, 0, 18432, 154, }, /* 409 */ + { 11, 7, 12, 0, 0, 18432, 84, }, /* 410 */ + { 11, 7, 12, 0, 0, 18432, 156, }, /* 411 */ + { 11, 12, 3, 0, 0, 26624, 98, }, /* 412 */ + { 11, 12, 3, 0, 0, 26624, 158, }, /* 413 */ + { 11, 13, 12, 0, 0, 18640, 144, }, /* 414 */ + { 11, 21, 12, 0, 0, 18432, 74, }, /* 415 */ + { 11, 23, 12, 0, 0, 14336, 74, }, /* 416 */ + { 11, 12, 3, 0, 0, 26624, 162, }, /* 417 */ + { 12, 12, 3, 0, 0, 26624, 106, }, /* 418 */ + { 12, 10, 5, 0, 0, 18432, 154, }, /* 419 */ + { 12, 7, 12, 0, 0, 18432, 84, }, /* 420 */ + { 12, 7, 12, 0, 0, 18432, 156, }, /* 421 */ + { 12, 12, 3, 0, 0, 26624, 98, }, /* 422 */ + { 12, 10, 3, 0, 0, 18432, 160, }, /* 423 */ + { 12, 12, 3, 0, 0, 26624, 158, }, /* 424 */ + { 12, 12, 3, 0, 0, 26624, 164, }, /* 425 */ + { 12, 13, 12, 0, 0, 18432, 144, }, /* 426 */ + { 12, 26, 12, 0, 0, 18432, 74, }, /* 427 */ + { 12, 15, 12, 0, 0, 18432, 74, }, /* 428 */ + { 13, 12, 3, 0, 0, 26624, 106, }, /* 429 */ + { 13, 7, 12, 0, 0, 18432, 84, }, /* 430 */ + { 13, 10, 3, 0, 0, 18432, 160, }, /* 431 */ + { 13, 10, 5, 0, 0, 18432, 154, }, /* 432 */ + { 13, 12, 3, 0, 0, 26624, 158, }, /* 433 */ + { 13, 13, 12, 0, 0, 18644, 144, }, /* 434 */ + { 13, 15, 12, 0, 0, 18644, 74, }, /* 435 */ + { 13, 26, 12, 0, 0, 28884, 74, }, /* 436 */ + { 13, 26, 12, 0, 0, 28672, 74, }, /* 437 */ + { 13, 23, 12, 0, 0, 14336, 74, }, /* 438 */ + { 14, 12, 3, 0, 0, 26624, 106, }, /* 439 */ + { 14, 10, 5, 0, 0, 18432, 154, }, /* 440 */ + { 14, 7, 12, 0, 0, 18432, 84, }, /* 441 */ + { 14, 7, 12, 0, 0, 18432, 156, }, /* 442 */ + { 14, 12, 3, 0, 0, 26624, 98, }, /* 443 */ + { 14, 12, 3, 0, 0, 26624, 158, }, /* 444 */ + { 14, 13, 12, 0, 0, 18432, 144, }, /* 445 */ + { 14, 21, 12, 0, 0, 18432, 74, }, /* 446 */ + { 14, 15, 12, 0, 0, 28672, 74, }, /* 447 */ + { 14, 26, 12, 0, 0, 18432, 74, }, /* 448 */ + { 15, 7, 12, 0, 0, 18432, 84, }, /* 449 */ + { 15, 12, 3, 0, 0, 26624, 106, }, /* 450 */ + { 15, 10, 5, 0, 0, 18432, 154, }, /* 451 */ + { 15, 21, 12, 0, 0, 18432, 74, }, /* 452 */ + { 15, 12, 3, 0, 0, 26624, 98, }, /* 453 */ + { 15, 12, 3, 0, 0, 18432, 106, }, /* 454 */ + { 15, 10, 3, 0, 0, 18432, 160, }, /* 455 */ + { 15, 12, 3, 0, 0, 26624, 158, }, /* 456 */ + { 15, 13, 12, 0, 0, 18648, 144, }, /* 457 */ + { 16, 12, 3, 0, 0, 26624, 106, }, /* 458 */ + { 16, 10, 5, 0, 0, 18432, 154, }, /* 459 */ + { 16, 7, 12, 0, 0, 18432, 84, }, /* 460 */ + { 16, 7, 12, 0, 0, 18432, 156, }, /* 461 */ + { 16, 12, 3, 0, 0, 26624, 158, }, /* 462 */ + { 16, 10, 3, 0, 0, 18432, 160, }, /* 463 */ + { 16, 7, 4, 0, 0, 18432, 84, }, /* 464 */ + { 16, 26, 12, 0, 0, 18432, 74, }, /* 465 */ + { 16, 15, 12, 0, 0, 18432, 74, }, /* 466 */ + { 16, 13, 12, 0, 0, 18432, 144, }, /* 467 */ + { 17, 12, 3, 0, 0, 26624, 106, }, /* 468 */ + { 17, 10, 5, 0, 0, 18432, 154, }, /* 469 */ + { 17, 7, 12, 0, 0, 18432, 84, }, /* 470 */ + { 17, 12, 3, 0, 0, 26624, 158, }, /* 471 */ + { 17, 10, 3, 0, 0, 18432, 160, }, /* 472 */ + { 17, 13, 12, 0, 0, 18432, 144, }, /* 473 */ + { 17, 21, 12, 0, 0, 18432, 74, }, /* 474 */ + { 18, 7, 12, 0, 0, 18432, 84, }, /* 475 */ + { 18, 12, 3, 0, 0, 26624, 106, }, /* 476 */ + { 18, 7, 5, 0, 0, 18432, 166, }, /* 477 */ + { 18, 12, 3, 0, 0, 26624, 168, }, /* 478 */ + { 99, 23, 12, 0, 0, 14336, 74, }, /* 479 */ + { 18, 7, 12, 0, 0, 18432, 170, }, /* 480 */ + { 18, 6, 12, 0, 0, 18432, 138, }, /* 481 */ + { 18, 12, 3, 0, 0, 26624, 98, }, /* 482 */ + { 18, 21, 12, 0, 0, 18432, 74, }, /* 483 */ + { 18, 13, 12, 0, 0, 18432, 144, }, /* 484 */ + { 18, 21, 12, 0, 0, 18432, 110, }, /* 485 */ + { 100, 7, 12, 0, 0, 18432, 84, }, /* 486 */ + { 100, 12, 3, 0, 0, 26624, 106, }, /* 487 */ + { 100, 7, 5, 0, 0, 18432, 166, }, /* 488 */ + { 100, 12, 3, 0, 0, 26624, 158, }, /* 489 */ + { 100, 7, 12, 0, 0, 18432, 170, }, /* 490 */ + { 100, 6, 12, 0, 0, 18432, 138, }, /* 491 */ + { 100, 12, 3, 0, 0, 26624, 98, }, /* 492 */ + { 100, 12, 3, 0, 0, 26624, 104, }, /* 493 */ + { 100, 13, 12, 0, 0, 18432, 144, }, /* 494 */ + { 19, 7, 12, 0, 0, 18432, 84, }, /* 495 */ + { 19, 26, 12, 0, 0, 18432, 74, }, /* 496 */ + { 19, 21, 12, 0, 0, 18432, 74, }, /* 497 */ + { 19, 21, 12, 0, 0, 18432, 110, }, /* 498 */ + { 19, 12, 3, 0, 0, 26624, 98, }, /* 499 */ + { 19, 13, 12, 0, 0, 18432, 144, }, /* 500 */ + { 19, 15, 12, 0, 0, 18432, 74, }, /* 501 */ + { 19, 22, 12, 0, 0, 28672, 172, }, /* 502 */ + { 19, 18, 12, 0, 0, 28672, 172, }, /* 503 */ + { 19, 10, 5, 0, 0, 18432, 174, }, /* 504 */ + { 19, 12, 3, 0, 0, 26624, 106, }, /* 505 */ + { 19, 12, 3, 0, 0, 26624, 176, }, /* 506 */ + { 19, 10, 5, 0, 0, 18432, 154, }, /* 507 */ + { 19, 12, 3, 0, 0, 26624, 132, }, /* 508 */ + { 19, 12, 3, 0, 0, 26624, 158, }, /* 509 */ + { 99, 26, 12, 0, 0, 18432, 74, }, /* 510 */ + { 20, 7, 12, 0, 0, 18432, 84, }, /* 511 */ + { 20, 10, 12, 0, 0, 18432, 154, }, /* 512 */ + { 20, 12, 3, 0, 0, 26624, 106, }, /* 513 */ + { 20, 10, 5, 0, 0, 18432, 154, }, /* 514 */ + { 20, 12, 3, 0, 0, 26624, 98, }, /* 515 */ + { 20, 12, 3, 0, 0, 26624, 158, }, /* 516 */ + { 20, 13, 12, 0, 0, 18652, 144, }, /* 517 */ + { 20, 21, 12, 0, 0, 18432, 128, }, /* 518 */ + { 20, 21, 12, 0, 0, 18432, 74, }, /* 519 */ + { 20, 10, 12, 0, 0, 18432, 178, }, /* 520 */ + { 20, 12, 3, 0, 0, 26624, 132, }, /* 521 */ + { 20, 13, 12, 0, 0, 18432, 144, }, /* 522 */ + { 20, 26, 12, 0, 0, 18432, 74, }, /* 523 */ + { 21, 9, 12, 0, 7264, 18432, 76, }, /* 524 */ + { 21, 5, 12, 0, 3008, 18432, 180, }, /* 525 */ + { 99, 21, 12, 0, 0, 18656, 74, }, /* 526 */ + { 21, 6, 12, 0, 0, 18432, 182, }, /* 527 */ + { 22, 7, 6, 0, 0, 18432, 84, }, /* 528 */ + { 22, 7, 6, 0, 0, 18432, 184, }, /* 529 */ + { 22, 7, 7, 0, 0, 18432, 184, }, /* 530 */ + { 22, 7, 7, 0, 0, 18432, 84, }, /* 531 */ + { 22, 7, 8, 0, 0, 18432, 84, }, /* 532 */ + { 23, 7, 12, 0, 0, 18432, 84, }, /* 533 */ + { 23, 12, 3, 0, 0, 26624, 98, }, /* 534 */ + { 23, 21, 12, 0, 0, 18432, 74, }, /* 535 */ + { 23, 21, 12, 0, 0, 18432, 110, }, /* 536 */ + { 23, 21, 12, 0, 0, 18432, 128, }, /* 537 */ + { 23, 15, 12, 0, 0, 18432, 144, }, /* 538 */ + { 23, 15, 12, 0, 0, 18432, 74, }, /* 539 */ + { 23, 26, 12, 0, 0, 28672, 74, }, /* 540 */ + { 24, 9, 12, 0, 38864, 18432, 186, }, /* 541 */ + { 24, 9, 12, 0, 8, 18432, 186, }, /* 542 */ + { 24, 5, 12, 0, -8, 18432, 70, }, /* 543 */ + { 101, 17, 12, 0, 0, 28672, 130, }, /* 544 */ + { 101, 7, 12, 0, 0, 18432, 84, }, /* 545 */ + { 101, 26, 12, 0, 0, 18432, 74, }, /* 546 */ + { 101, 21, 12, 0, 0, 18432, 128, }, /* 547 */ + { 102, 29, 12, 0, 0, 45056, 52, }, /* 548 */ + { 102, 7, 12, 0, 0, 18432, 84, }, /* 549 */ + { 102, 22, 12, 0, 0, 28672, 172, }, /* 550 */ + { 102, 18, 12, 0, 0, 28672, 172, }, /* 551 */ + { 25, 7, 12, 0, 0, 18432, 84, }, /* 552 */ + { 99, 21, 12, 0, 0, 18660, 110, }, /* 553 */ + { 25, 14, 12, 0, 0, 18432, 84, }, /* 554 */ + { 33, 7, 12, 0, 0, 18432, 84, }, /* 555 */ + { 33, 12, 3, 0, 0, 26624, 106, }, /* 556 */ + { 33, 12, 3, 0, 0, 26624, 158, }, /* 557 */ + { 33, 10, 3, 0, 0, 18432, 188, }, /* 558 */ + { 34, 7, 12, 0, 0, 18432, 84, }, /* 559 */ + { 34, 12, 3, 0, 0, 26624, 106, }, /* 560 */ + { 34, 10, 3, 0, 0, 18432, 188, }, /* 561 */ + { 99, 21, 12, 0, 0, 18664, 128, }, /* 562 */ + { 35, 7, 12, 0, 0, 18432, 84, }, /* 563 */ + { 35, 12, 3, 0, 0, 26624, 106, }, /* 564 */ + { 36, 7, 12, 0, 0, 18432, 84, }, /* 565 */ + { 36, 12, 3, 0, 0, 26624, 106, }, /* 566 */ + { 103, 7, 12, 0, 0, 18432, 84, }, /* 567 */ + { 103, 7, 12, 0, 0, 18432, 146, }, /* 568 */ + { 103, 12, 3, 0, 0, 26624, 102, }, /* 569 */ + { 103, 10, 5, 0, 0, 18432, 154, }, /* 570 */ + { 103, 12, 3, 0, 0, 26624, 106, }, /* 571 */ + { 103, 12, 3, 0, 0, 26624, 98, }, /* 572 */ + { 103, 12, 3, 0, 0, 26624, 158, }, /* 573 */ + { 103, 21, 12, 0, 0, 18432, 128, }, /* 574 */ + { 103, 21, 12, 0, 0, 18432, 110, }, /* 575 */ + { 103, 6, 12, 0, 0, 18432, 148, }, /* 576 */ + { 103, 21, 12, 0, 0, 18432, 74, }, /* 577 */ + { 103, 23, 12, 0, 0, 14336, 74, }, /* 578 */ + { 103, 13, 12, 0, 0, 18432, 144, }, /* 579 */ + { 103, 15, 12, 0, 0, 28672, 74, }, /* 580 */ + { 26, 21, 12, 0, 0, 28672, 74, }, /* 581 */ + { 99, 21, 12, 0, 0, 28908, 110, }, /* 582 */ + { 99, 21, 12, 0, 0, 28908, 128, }, /* 583 */ + { 26, 21, 12, 0, 0, 28672, 110, }, /* 584 */ + { 26, 17, 12, 0, 0, 28672, 130, }, /* 585 */ + { 26, 21, 12, 0, 0, 28672, 128, }, /* 586 */ + { 26, 21, 12, 0, 0, 28672, 190, }, /* 587 */ + { 26, 12, 3, 0, 0, 26624, 192, }, /* 588 */ + { 26, 1, 2, 0, 0, 6144, 66, }, /* 589 */ + { 26, 13, 12, 0, 0, 18432, 144, }, /* 590 */ + { 26, 7, 12, 0, 0, 18432, 84, }, /* 591 */ + { 26, 6, 12, 0, 0, 18432, 138, }, /* 592 */ + { 26, 12, 3, 0, 0, 26624, 194, }, /* 593 */ + { 26, 12, 3, 0, 0, 26624, 106, }, /* 594 */ + { 37, 7, 12, 0, 0, 18432, 84, }, /* 595 */ + { 37, 12, 3, 0, 0, 26624, 106, }, /* 596 */ + { 37, 10, 5, 0, 0, 18432, 154, }, /* 597 */ + { 37, 12, 3, 0, 0, 26624, 98, }, /* 598 */ + { 37, 26, 12, 0, 0, 28672, 74, }, /* 599 */ + { 37, 21, 12, 0, 0, 28672, 128, }, /* 600 */ + { 37, 13, 12, 0, 0, 18432, 144, }, /* 601 */ + { 38, 7, 12, 0, 0, 18432, 84, }, /* 602 */ + { 110, 7, 12, 0, 0, 18432, 84, }, /* 603 */ + { 110, 7, 12, 0, 0, 18432, 170, }, /* 604 */ + { 110, 13, 12, 0, 0, 18432, 144, }, /* 605 */ + { 110, 15, 12, 0, 0, 18432, 144, }, /* 606 */ + { 110, 26, 12, 0, 0, 28672, 74, }, /* 607 */ + { 103, 26, 12, 0, 0, 28672, 74, }, /* 608 */ + { 42, 7, 12, 0, 0, 18432, 84, }, /* 609 */ + { 42, 12, 3, 0, 0, 26624, 106, }, /* 610 */ + { 42, 10, 5, 0, 0, 18432, 154, }, /* 611 */ + { 42, 21, 12, 0, 0, 18432, 74, }, /* 612 */ + { 123, 7, 12, 0, 0, 18432, 84, }, /* 613 */ + { 123, 10, 5, 0, 0, 18432, 154, }, /* 614 */ + { 123, 12, 3, 0, 0, 26624, 106, }, /* 615 */ + { 123, 12, 3, 0, 0, 26624, 158, }, /* 616 */ + { 123, 10, 12, 0, 0, 18432, 154, }, /* 617 */ + { 123, 12, 3, 0, 0, 26624, 98, }, /* 618 */ + { 123, 13, 12, 0, 0, 18432, 144, }, /* 619 */ + { 123, 21, 12, 0, 0, 18432, 74, }, /* 620 */ + { 123, 6, 12, 0, 0, 18432, 138, }, /* 621 */ + { 123, 21, 12, 0, 0, 18432, 128, }, /* 622 */ + { 106, 11, 3, 0, 0, 26624, 196, }, /* 623 */ + { 106, 12, 3, 0, 0, 26624, 106, }, /* 624 */ + { 113, 12, 3, 0, 0, 26624, 106, }, /* 625 */ + { 113, 10, 5, 0, 0, 18432, 154, }, /* 626 */ + { 113, 7, 12, 0, 0, 18432, 84, }, /* 627 */ + { 113, 12, 3, 0, 0, 26624, 98, }, /* 628 */ + { 113, 10, 3, 0, 0, 18432, 160, }, /* 629 */ + { 113, 10, 3, 0, 0, 18432, 188, }, /* 630 */ + { 113, 21, 12, 0, 0, 18432, 128, }, /* 631 */ + { 113, 13, 12, 0, 0, 18432, 144, }, /* 632 */ + { 113, 21, 12, 0, 0, 18432, 74, }, /* 633 */ + { 113, 21, 12, 0, 0, 18432, 110, }, /* 634 */ + { 113, 26, 12, 0, 0, 18432, 74, }, /* 635 */ + { 116, 12, 3, 0, 0, 26624, 106, }, /* 636 */ + { 116, 10, 5, 0, 0, 18432, 154, }, /* 637 */ + { 116, 7, 12, 0, 0, 18432, 84, }, /* 638 */ + { 116, 10, 3, 0, 0, 18432, 188, }, /* 639 */ + { 116, 12, 3, 0, 0, 26624, 158, }, /* 640 */ + { 116, 13, 12, 0, 0, 18432, 144, }, /* 641 */ + { 132, 7, 12, 0, 0, 18432, 84, }, /* 642 */ + { 132, 12, 3, 0, 0, 26624, 98, }, /* 643 */ + { 132, 10, 5, 0, 0, 18432, 154, }, /* 644 */ + { 132, 12, 3, 0, 0, 26624, 106, }, /* 645 */ + { 132, 10, 3, 0, 0, 18432, 188, }, /* 646 */ + { 132, 21, 12, 0, 0, 18432, 74, }, /* 647 */ + { 117, 7, 12, 0, 0, 18432, 84, }, /* 648 */ + { 117, 10, 5, 0, 0, 18432, 154, }, /* 649 */ + { 117, 12, 3, 0, 0, 26624, 106, }, /* 650 */ + { 117, 12, 3, 0, 0, 26624, 198, }, /* 651 */ + { 117, 12, 3, 0, 0, 26624, 98, }, /* 652 */ + { 117, 21, 12, 0, 0, 18432, 128, }, /* 653 */ + { 117, 21, 12, 0, 0, 18432, 110, }, /* 654 */ + { 117, 13, 12, 0, 0, 18432, 144, }, /* 655 */ + { 118, 13, 12, 0, 0, 18432, 144, }, /* 656 */ + { 118, 7, 12, 0, 0, 18432, 84, }, /* 657 */ + { 118, 6, 12, 0, 0, 18432, 94, }, /* 658 */ + { 118, 6, 12, 0, 0, 18432, 96, }, /* 659 */ + { 118, 21, 12, 0, 0, 18432, 128, }, /* 660 */ + { 2, 5, 12, 63, -6222, 18432, 70, }, /* 661 */ + { 2, 5, 12, 67, -6221, 18432, 70, }, /* 662 */ + { 2, 5, 12, 71, -6212, 18432, 70, }, /* 663 */ + { 2, 5, 12, 75, -6210, 18432, 70, }, /* 664 */ + { 2, 5, 12, 79, -6210, 18432, 70, }, /* 665 */ + { 2, 5, 12, 79, -6211, 18432, 70, }, /* 666 */ + { 2, 5, 12, 84, -6204, 18432, 70, }, /* 667 */ + { 2, 5, 12, 88, -6180, 18432, 70, }, /* 668 */ + { 2, 5, 12, 108, 35267, 18432, 70, }, /* 669 */ + { 21, 9, 12, 0, -3008, 18432, 76, }, /* 670 */ + { 116, 21, 12, 0, 0, 18432, 74, }, /* 671 */ + { 106, 12, 3, 0, 0, 26864, 98, }, /* 672 */ + { 106, 12, 3, 0, 0, 26868, 98, }, /* 673 */ + { 99, 21, 12, 0, 0, 18680, 200, }, /* 674 */ + { 106, 12, 3, 0, 0, 26876, 98, }, /* 675 */ + { 106, 12, 3, 0, 0, 26880, 98, }, /* 676 */ + { 106, 12, 3, 0, 0, 26884, 98, }, /* 677 */ + { 99, 10, 5, 0, 0, 18684, 174, }, /* 678 */ + { 99, 7, 12, 0, 0, 18696, 84, }, /* 679 */ + { 99, 7, 12, 0, 0, 18684, 84, }, /* 680 */ + { 99, 7, 12, 0, 0, 18676, 84, }, /* 681 */ + { 99, 7, 12, 0, 0, 18700, 84, }, /* 682 */ + { 99, 7, 12, 0, 0, 18704, 84, }, /* 683 */ + { 106, 12, 3, 0, 0, 26900, 98, }, /* 684 */ + { 99, 10, 5, 0, 0, 18712, 174, }, /* 685 */ + { 106, 12, 3, 0, 0, 26896, 98, }, /* 686 */ + { 99, 7, 12, 0, 0, 18716, 84, }, /* 687 */ + { 2, 5, 12, 0, 0, 18432, 60, }, /* 688 */ + { 1, 6, 12, 0, 0, 18432, 90, }, /* 689 */ + { 2, 6, 12, 0, 0, 18432, 182, }, /* 690 */ + { 0, 5, 12, 0, 35332, 18432, 78, }, /* 691 */ + { 0, 5, 12, 0, 3814, 18432, 78, }, /* 692 */ + { 0, 5, 12, 0, 35384, 18432, 78, }, /* 693 */ + { 0, 5, 12, 0, 0, 18432, 202, }, /* 694 */ + { 0, 6, 12, 0, 0, 18432, 182, }, /* 695 */ + { 0, 6, 12, 0, 0, 18432, 204, }, /* 696 */ + { 1, 6, 12, 0, 0, 18432, 182, }, /* 697 */ + { 106, 12, 3, 0, 0, 26740, 104, }, /* 698 */ + { 106, 12, 3, 0, 0, 26912, 98, }, /* 699 */ + { 106, 12, 3, 0, 0, 26916, 98, }, /* 700 */ + { 0, 9, 12, 92, 1, 18432, 76, }, /* 701 */ + { 0, 5, 12, 92, -1, 18432, 78, }, /* 702 */ + { 0, 5, 12, 0, 0, 18432, 70, }, /* 703 */ + { 0, 5, 12, 92, -58, 18432, 70, }, /* 704 */ + { 0, 9, 12, 0, -7615, 18432, 76, }, /* 705 */ + { 1, 5, 12, 0, 8, 18432, 78, }, /* 706 */ + { 1, 9, 12, 0, -8, 18432, 76, }, /* 707 */ + { 1, 5, 12, 0, 0, 18432, 78, }, /* 708 */ + { 1, 5, 12, 0, 74, 18432, 78, }, /* 709 */ + { 1, 5, 12, 0, 86, 18432, 78, }, /* 710 */ + { 1, 5, 12, 0, 100, 18432, 78, }, /* 711 */ + { 1, 5, 12, 0, 128, 18432, 78, }, /* 712 */ + { 1, 5, 12, 0, 112, 18432, 78, }, /* 713 */ + { 1, 5, 12, 0, 126, 18432, 78, }, /* 714 */ + { 1, 5, 12, 0, 8, 18432, 70, }, /* 715 */ + { 1, 8, 12, 0, -8, 18432, 88, }, /* 716 */ + { 1, 5, 12, 0, 0, 18432, 70, }, /* 717 */ + { 1, 5, 12, 0, 9, 18432, 70, }, /* 718 */ + { 1, 9, 12, 0, -74, 18432, 76, }, /* 719 */ + { 1, 8, 12, 0, -9, 18432, 88, }, /* 720 */ + { 1, 5, 12, 21, -7173, 18432, 78, }, /* 721 */ + { 1, 9, 12, 0, -86, 18432, 76, }, /* 722 */ + { 1, 5, 12, 0, -7235, 18432, 78, }, /* 723 */ + { 1, 9, 12, 0, -100, 18432, 76, }, /* 724 */ + { 1, 5, 12, 0, -7219, 18432, 78, }, /* 725 */ + { 1, 9, 12, 0, -112, 18432, 76, }, /* 726 */ + { 1, 9, 12, 0, -128, 18432, 76, }, /* 727 */ + { 1, 9, 12, 0, -126, 18432, 76, }, /* 728 */ + { 99, 29, 12, 0, 0, 45056, 52, }, /* 729 */ + { 106, 1, 3, 0, 0, 6144, 206, }, /* 730 */ + { 106, 1, 13, 0, 0, 6144, 208, }, /* 731 */ + { 99, 1, 2, 0, 0, 18432, 210, }, /* 732 */ + { 99, 1, 2, 0, 0, 34816, 210, }, /* 733 */ + { 99, 17, 12, 0, 0, 28672, 212, }, /* 734 */ + { 99, 21, 12, 0, 0, 28672, 64, }, /* 735 */ + { 99, 20, 12, 0, 0, 28672, 214, }, /* 736 */ + { 99, 19, 12, 0, 0, 28672, 214, }, /* 737 */ + { 99, 22, 12, 0, 0, 28672, 216, }, /* 738 */ + { 99, 20, 12, 0, 0, 28672, 216, }, /* 739 */ + { 99, 19, 12, 0, 0, 28672, 216, }, /* 740 */ + { 99, 21, 12, 0, 0, 28672, 218, }, /* 741 */ + { 99, 21, 12, 0, 0, 28672, 220, }, /* 742 */ + { 99, 27, 2, 0, 0, 45056, 50, }, /* 743 */ + { 99, 28, 2, 0, 0, 4096, 50, }, /* 744 */ + { 99, 1, 2, 0, 0, 20480, 136, }, /* 745 */ + { 99, 1, 2, 0, 0, 36864, 136, }, /* 746 */ + { 99, 1, 2, 0, 0, 30720, 136, }, /* 747 */ + { 99, 1, 2, 0, 0, 24576, 136, }, /* 748 */ + { 99, 1, 2, 0, 0, 40960, 136, }, /* 749 */ + { 99, 29, 12, 0, 0, 8488, 52, }, /* 750 */ + { 99, 21, 12, 0, 0, 14336, 54, }, /* 751 */ + { 99, 21, 12, 0, 0, 14336, 64, }, /* 752 */ + { 99, 21, 14, 0, 0, 28672, 222, }, /* 753 */ + { 99, 21, 12, 0, 0, 28672, 224, }, /* 754 */ + { 99, 16, 12, 0, 0, 28672, 144, }, /* 755 */ + { 99, 16, 12, 0, 0, 28672, 226, }, /* 756 */ + { 99, 25, 12, 0, 0, 8192, 64, }, /* 757 */ + { 99, 22, 12, 0, 0, 28672, 228, }, /* 758 */ + { 99, 18, 12, 0, 0, 28672, 228, }, /* 759 */ + { 99, 21, 12, 0, 0, 28972, 54, }, /* 760 */ + { 99, 21, 12, 0, 0, 28672, 212, }, /* 761 */ + { 99, 21, 12, 0, 0, 28976, 54, }, /* 762 */ + { 99, 21, 12, 0, 0, 28980, 54, }, /* 763 */ + { 99, 1, 2, 0, 0, 6144, 230, }, /* 764 */ + { 98, 2, 2, 0, 0, 6144, 232, }, /* 765 */ + { 99, 1, 2, 0, 0, 22528, 136, }, /* 766 */ + { 99, 1, 2, 0, 0, 38912, 136, }, /* 767 */ + { 99, 1, 2, 0, 0, 16384, 136, }, /* 768 */ + { 99, 1, 2, 0, 0, 32768, 136, }, /* 769 */ + { 99, 1, 2, 0, 0, 6144, 234, }, /* 770 */ + { 99, 25, 12, 0, 0, 12288, 236, }, /* 771 */ + { 99, 25, 12, 0, 0, 12288, 238, }, /* 772 */ + { 99, 25, 12, 0, 0, 28672, 236, }, /* 773 */ + { 99, 22, 12, 0, 0, 28672, 240, }, /* 774 */ + { 99, 18, 12, 0, 0, 28672, 240, }, /* 775 */ + { 98, 2, 12, 0, 0, 14336, 0, }, /* 776 */ + { 106, 12, 3, 0, 0, 26624, 242, }, /* 777 */ + { 106, 11, 3, 0, 0, 26624, 124, }, /* 778 */ + { 106, 11, 3, 0, 0, 26624, 244, }, /* 779 */ + { 106, 12, 3, 0, 0, 26936, 104, }, /* 780 */ + { 99, 26, 12, 0, 0, 28672, 74, }, /* 781 */ + { 99, 9, 12, 0, 0, 18432, 116, }, /* 782 */ + { 99, 5, 12, 0, 0, 18432, 246, }, /* 783 */ + { 99, 25, 12, 0, 0, 28672, 248, }, /* 784 */ + { 99, 26, 14, 0, 0, 28672, 250, }, /* 785 */ + { 1, 9, 12, 96, -7517, 18432, 76, }, /* 786 */ + { 99, 26, 12, 0, 0, 28672, 122, }, /* 787 */ + { 0, 9, 12, 100, 0, 18432, 76, }, /* 788 */ + { 0, 9, 12, 104, -8262, 18432, 76, }, /* 789 */ + { 99, 26, 12, 0, 0, 14336, 252, }, /* 790 */ + { 0, 9, 12, 0, 28, 18432, 76, }, /* 791 */ + { 99, 7, 12, 0, 0, 18432, 254, }, /* 792 */ + { 99, 5, 14, 0, 0, 18432, 256, }, /* 793 */ + { 99, 25, 12, 0, 0, 28672, 122, }, /* 794 */ + { 99, 5, 12, 0, 0, 18432, 258, }, /* 795 */ + { 0, 5, 12, 0, -28, 18432, 78, }, /* 796 */ + { 0, 14, 12, 0, 16, 18432, 76, }, /* 797 */ + { 0, 14, 12, 0, -16, 18432, 78, }, /* 798 */ + { 0, 14, 12, 0, 0, 18432, 84, }, /* 799 */ + { 99, 25, 14, 0, 0, 28672, 260, }, /* 800 */ + { 99, 26, 14, 0, 0, 28672, 260, }, /* 801 */ + { 99, 26, 12, 0, 0, 28672, 64, }, /* 802 */ + { 99, 25, 12, 0, 0, 28672, 262, }, /* 803 */ + { 99, 25, 12, 0, 0, 28672, 264, }, /* 804 */ + { 99, 25, 12, 0, 0, 12288, 266, }, /* 805 */ + { 99, 22, 12, 0, 0, 28672, 264, }, /* 806 */ + { 99, 18, 12, 0, 0, 28672, 264, }, /* 807 */ + { 99, 26, 14, 0, 0, 28672, 268, }, /* 808 */ + { 99, 22, 12, 0, 0, 28672, 270, }, /* 809 */ + { 99, 18, 12, 0, 0, 28672, 270, }, /* 810 */ + { 99, 26, 12, 0, 0, 18432, 54, }, /* 811 */ + { 99, 26, 14, 0, 0, 28672, 272, }, /* 812 */ + { 98, 2, 12, 0, 0, 18432, 274, }, /* 813 */ + { 99, 15, 12, 0, 0, 10240, 74, }, /* 814 */ + { 99, 26, 12, 0, 26, 18432, 276, }, /* 815 */ + { 99, 26, 14, 0, 26, 18432, 278, }, /* 816 */ + { 99, 26, 12, 0, -26, 18432, 280, }, /* 817 */ + { 99, 25, 14, 0, 0, 28672, 282, }, /* 818 */ + { 99, 26, 14, 0, 0, 28672, 284, }, /* 819 */ + { 99, 26, 14, 0, 0, 28672, 286, }, /* 820 */ + { 99, 25, 14, 0, 0, 28672, 284, }, /* 821 */ + { 99, 26, 14, 0, 0, 18432, 272, }, /* 822 */ + { 99, 26, 14, 0, 0, 28672, 288, }, /* 823 */ + { 109, 26, 12, 0, 0, 18432, 54, }, /* 824 */ + { 99, 26, 12, 0, 0, 28672, 228, }, /* 825 */ + { 44, 9, 12, 0, 48, 18432, 76, }, /* 826 */ + { 44, 5, 12, 0, -48, 18432, 78, }, /* 827 */ + { 0, 9, 12, 0, -10743, 18432, 76, }, /* 828 */ + { 0, 9, 12, 0, -3814, 18432, 76, }, /* 829 */ + { 0, 9, 12, 0, -10727, 18432, 76, }, /* 830 */ + { 0, 5, 12, 0, -10795, 18432, 78, }, /* 831 */ + { 0, 5, 12, 0, -10792, 18432, 78, }, /* 832 */ + { 0, 9, 12, 0, -10780, 18432, 76, }, /* 833 */ + { 0, 9, 12, 0, -10749, 18432, 76, }, /* 834 */ + { 0, 9, 12, 0, -10783, 18432, 76, }, /* 835 */ + { 0, 9, 12, 0, -10782, 18432, 76, }, /* 836 */ + { 0, 9, 12, 0, -10815, 18432, 76, }, /* 837 */ + { 43, 5, 12, 0, 0, 18432, 60, }, /* 838 */ + { 43, 26, 12, 0, 0, 28672, 74, }, /* 839 */ + { 43, 12, 3, 0, 0, 26624, 98, }, /* 840 */ + { 43, 21, 12, 0, 0, 28672, 128, }, /* 841 */ + { 43, 21, 12, 0, 0, 28672, 74, }, /* 842 */ + { 43, 15, 12, 0, 0, 28672, 74, }, /* 843 */ + { 21, 5, 12, 0, -7264, 18432, 78, }, /* 844 */ + { 45, 7, 12, 0, 0, 18432, 84, }, /* 845 */ + { 45, 6, 12, 0, 0, 18432, 148, }, /* 846 */ + { 45, 21, 12, 0, 0, 18432, 74, }, /* 847 */ + { 45, 12, 3, 0, 0, 26624, 290, }, /* 848 */ + { 2, 12, 3, 0, 0, 26624, 106, }, /* 849 */ + { 99, 20, 12, 0, 0, 28672, 228, }, /* 850 */ + { 99, 19, 12, 0, 0, 28672, 228, }, /* 851 */ + { 99, 17, 12, 0, 0, 28988, 212, }, /* 852 */ + { 99, 6, 12, 0, 0, 28672, 292, }, /* 853 */ + { 99, 21, 12, 0, 0, 28992, 54, }, /* 854 */ + { 99, 21, 12, 0, 0, 28996, 54, }, /* 855 */ + { 99, 21, 12, 0, 0, 29000, 224, }, /* 856 */ + { 99, 21, 12, 0, 0, 29004, 294, }, /* 857 */ + { 99, 21, 12, 0, 0, 28812, 54, }, /* 858 */ + { 99, 21, 12, 0, 0, 28672, 294, }, /* 859 */ + { 30, 26, 12, 0, 0, 28672, 296, }, /* 860 */ + { 99, 26, 12, 0, 0, 29008, 298, }, /* 861 */ + { 99, 26, 12, 0, 0, 29008, 300, }, /* 862 */ + { 99, 26, 12, 0, 0, 29008, 302, }, /* 863 */ + { 99, 21, 12, 0, 0, 29012, 294, }, /* 864 */ + { 99, 21, 12, 0, 0, 29016, 224, }, /* 865 */ + { 99, 21, 12, 0, 0, 29020, 54, }, /* 866 */ + { 30, 6, 12, 0, 0, 18432, 138, }, /* 867 */ + { 99, 7, 12, 0, 0, 18784, 304, }, /* 868 */ + { 30, 14, 12, 0, 0, 18432, 304, }, /* 869 */ + { 99, 22, 12, 0, 0, 29028, 228, }, /* 870 */ + { 99, 18, 12, 0, 0, 29028, 228, }, /* 871 */ + { 99, 22, 12, 0, 0, 29032, 228, }, /* 872 */ + { 99, 18, 12, 0, 0, 29032, 228, }, /* 873 */ + { 99, 22, 12, 0, 0, 29036, 62, }, /* 874 */ + { 99, 18, 12, 0, 0, 29036, 62, }, /* 875 */ + { 99, 22, 12, 0, 0, 29036, 228, }, /* 876 */ + { 99, 18, 12, 0, 0, 29036, 228, }, /* 877 */ + { 99, 26, 12, 0, 0, 29020, 54, }, /* 878 */ + { 99, 17, 12, 0, 0, 29020, 212, }, /* 879 */ + { 99, 22, 12, 0, 0, 29020, 216, }, /* 880 */ + { 99, 18, 12, 0, 0, 29020, 216, }, /* 881 */ + { 106, 12, 3, 0, 0, 26992, 98, }, /* 882 */ + { 22, 10, 3, 0, 0, 18432, 306, }, /* 883 */ + { 99, 17, 14, 0, 0, 29020, 308, }, /* 884 */ + { 99, 6, 12, 0, 0, 18804, 138, }, /* 885 */ + { 99, 26, 12, 0, 0, 29020, 74, }, /* 886 */ + { 30, 6, 12, 0, 0, 18432, 148, }, /* 887 */ + { 99, 7, 12, 0, 0, 18808, 84, }, /* 888 */ + { 99, 21, 14, 0, 0, 29048, 250, }, /* 889 */ + { 99, 26, 12, 0, 0, 29024, 74, }, /* 890 */ + { 27, 7, 12, 0, 0, 18432, 84, }, /* 891 */ + { 106, 12, 3, 0, 0, 26996, 98, }, /* 892 */ + { 99, 24, 12, 0, 0, 29044, 310, }, /* 893 */ + { 27, 6, 12, 0, 0, 18432, 138, }, /* 894 */ + { 99, 17, 12, 0, 0, 29044, 130, }, /* 895 */ + { 28, 7, 12, 0, 0, 18432, 84, }, /* 896 */ + { 99, 21, 12, 0, 0, 29036, 144, }, /* 897 */ + { 99, 6, 12, 0, 0, 18804, 96, }, /* 898 */ + { 28, 6, 12, 0, 0, 18432, 138, }, /* 899 */ + { 29, 7, 12, 0, 0, 18432, 84, }, /* 900 */ + { 22, 7, 12, 0, 0, 18432, 84, }, /* 901 */ + { 22, 7, 12, 0, 0, 18432, 184, }, /* 902 */ + { 99, 26, 12, 0, 0, 18784, 74, }, /* 903 */ + { 99, 15, 12, 0, 0, 18784, 74, }, /* 904 */ + { 22, 26, 12, 0, 0, 18432, 74, }, /* 905 */ + { 22, 26, 12, 0, 0, 28672, 74, }, /* 906 */ + { 99, 15, 12, 0, 0, 18432, 74, }, /* 907 */ + { 99, 26, 14, 0, 0, 18784, 250, }, /* 908 */ + { 28, 26, 12, 0, 0, 18432, 74, }, /* 909 */ + { 30, 7, 12, 0, 0, 18432, 312, }, /* 910 */ + { 31, 7, 12, 0, 0, 18432, 84, }, /* 911 */ + { 31, 6, 12, 0, 0, 18432, 138, }, /* 912 */ + { 31, 26, 12, 0, 0, 28672, 74, }, /* 913 */ + { 55, 7, 12, 0, 0, 18432, 84, }, /* 914 */ + { 55, 6, 12, 0, 0, 18432, 148, }, /* 915 */ + { 55, 21, 12, 0, 0, 18432, 110, }, /* 916 */ + { 55, 21, 12, 0, 0, 18432, 128, }, /* 917 */ + { 119, 7, 12, 0, 0, 18432, 84, }, /* 918 */ + { 119, 6, 12, 0, 0, 18432, 138, }, /* 919 */ + { 119, 21, 12, 0, 0, 28672, 110, }, /* 920 */ + { 119, 21, 12, 0, 0, 28672, 128, }, /* 921 */ + { 119, 13, 12, 0, 0, 18432, 144, }, /* 922 */ + { 2, 9, 12, 108, 1, 18432, 76, }, /* 923 */ + { 2, 5, 12, 108, -35267, 18432, 78, }, /* 924 */ + { 2, 7, 12, 0, 0, 18432, 84, }, /* 925 */ + { 2, 21, 12, 0, 0, 28672, 74, }, /* 926 */ + { 2, 12, 3, 0, 0, 26624, 98, }, /* 927 */ + { 2, 6, 12, 0, 0, 28672, 94, }, /* 928 */ + { 2, 6, 12, 0, 0, 18432, 90, }, /* 929 */ + { 126, 7, 12, 0, 0, 18432, 84, }, /* 930 */ + { 126, 14, 12, 0, 0, 18432, 84, }, /* 931 */ + { 126, 12, 3, 0, 0, 26624, 98, }, /* 932 */ + { 126, 21, 12, 0, 0, 18432, 74, }, /* 933 */ + { 126, 21, 12, 0, 0, 18432, 128, }, /* 934 */ + { 126, 21, 12, 0, 0, 18432, 110, }, /* 935 */ + { 99, 24, 12, 0, 0, 29052, 56, }, /* 936 */ + { 0, 9, 12, 0, -35332, 18432, 76, }, /* 937 */ + { 99, 24, 12, 0, 0, 18432, 56, }, /* 938 */ + { 0, 9, 12, 0, -42280, 18432, 76, }, /* 939 */ + { 0, 5, 12, 0, 48, 18432, 78, }, /* 940 */ + { 0, 9, 12, 0, -42308, 18432, 76, }, /* 941 */ + { 0, 9, 12, 0, -42319, 18432, 76, }, /* 942 */ + { 0, 9, 12, 0, -42315, 18432, 76, }, /* 943 */ + { 0, 9, 12, 0, -42305, 18432, 76, }, /* 944 */ + { 0, 9, 12, 0, -42258, 18432, 76, }, /* 945 */ + { 0, 9, 12, 0, -42282, 18432, 76, }, /* 946 */ + { 0, 9, 12, 0, -42261, 18432, 76, }, /* 947 */ + { 0, 9, 12, 0, 928, 18432, 76, }, /* 948 */ + { 0, 9, 12, 0, -48, 18432, 76, }, /* 949 */ + { 0, 9, 12, 0, -42307, 18432, 76, }, /* 950 */ + { 0, 9, 12, 0, -35384, 18432, 76, }, /* 951 */ + { 0, 9, 12, 0, -42343, 18432, 76, }, /* 952 */ + { 0, 9, 12, 0, -42561, 18432, 76, }, /* 953 */ + { 46, 7, 12, 0, 0, 18432, 84, }, /* 954 */ + { 46, 12, 3, 0, 0, 26624, 106, }, /* 955 */ + { 46, 12, 3, 0, 0, 26624, 158, }, /* 956 */ + { 46, 10, 5, 0, 0, 18432, 154, }, /* 957 */ + { 46, 26, 12, 0, 0, 28672, 74, }, /* 958 */ + { 99, 15, 12, 0, 0, 18816, 74, }, /* 959 */ + { 99, 15, 12, 0, 0, 18820, 74, }, /* 960 */ + { 99, 26, 12, 0, 0, 18824, 74, }, /* 961 */ + { 99, 23, 12, 0, 0, 14732, 74, }, /* 962 */ + { 99, 26, 12, 0, 0, 14728, 74, }, /* 963 */ + { 47, 7, 12, 0, 0, 18432, 84, }, /* 964 */ + { 47, 21, 12, 0, 0, 28672, 74, }, /* 965 */ + { 47, 21, 12, 0, 0, 28672, 128, }, /* 966 */ + { 120, 10, 5, 0, 0, 18432, 154, }, /* 967 */ + { 120, 7, 12, 0, 0, 18432, 84, }, /* 968 */ + { 120, 12, 3, 0, 0, 26624, 158, }, /* 969 */ + { 120, 12, 3, 0, 0, 26624, 106, }, /* 970 */ + { 120, 21, 12, 0, 0, 18432, 128, }, /* 971 */ + { 120, 13, 12, 0, 0, 18432, 144, }, /* 972 */ + { 8, 12, 3, 0, 0, 27024, 98, }, /* 973 */ + { 8, 7, 12, 0, 0, 18836, 84, }, /* 974 */ + { 49, 13, 12, 0, 0, 18432, 144, }, /* 975 */ + { 49, 7, 12, 0, 0, 18432, 84, }, /* 976 */ + { 49, 12, 3, 0, 0, 26624, 106, }, /* 977 */ + { 49, 12, 3, 0, 0, 26624, 98, }, /* 978 */ + { 99, 21, 12, 0, 0, 18840, 200, }, /* 979 */ + { 49, 21, 12, 0, 0, 18432, 128, }, /* 980 */ + { 121, 7, 12, 0, 0, 18432, 84, }, /* 981 */ + { 121, 12, 3, 0, 0, 26624, 106, }, /* 982 */ + { 121, 10, 5, 0, 0, 18432, 154, }, /* 983 */ + { 121, 10, 3, 0, 0, 18432, 188, }, /* 984 */ + { 121, 21, 12, 0, 0, 18432, 74, }, /* 985 */ + { 56, 12, 3, 0, 0, 26624, 106, }, /* 986 */ + { 56, 10, 5, 0, 0, 18432, 154, }, /* 987 */ + { 56, 7, 12, 0, 0, 18432, 84, }, /* 988 */ + { 56, 12, 3, 0, 0, 26624, 98, }, /* 989 */ + { 56, 10, 3, 0, 0, 18432, 188, }, /* 990 */ + { 56, 21, 12, 0, 0, 18432, 74, }, /* 991 */ + { 56, 21, 12, 0, 0, 18432, 110, }, /* 992 */ + { 56, 21, 12, 0, 0, 18432, 128, }, /* 993 */ + { 99, 6, 12, 0, 0, 18844, 138, }, /* 994 */ + { 56, 13, 12, 0, 0, 18432, 144, }, /* 995 */ + { 20, 6, 12, 0, 0, 18432, 138, }, /* 996 */ + { 122, 7, 12, 0, 0, 18432, 84, }, /* 997 */ + { 122, 12, 3, 0, 0, 26624, 106, }, /* 998 */ + { 122, 10, 5, 0, 0, 18432, 154, }, /* 999 */ + { 122, 13, 12, 0, 0, 18432, 144, }, /* 1000 */ + { 122, 21, 12, 0, 0, 18432, 74, }, /* 1001 */ + { 122, 21, 12, 0, 0, 18432, 128, }, /* 1002 */ + { 124, 7, 12, 0, 0, 18432, 84, }, /* 1003 */ + { 124, 12, 3, 0, 0, 26624, 106, }, /* 1004 */ + { 124, 7, 12, 0, 0, 18432, 170, }, /* 1005 */ + { 124, 12, 3, 0, 0, 26624, 98, }, /* 1006 */ + { 124, 7, 12, 0, 0, 18432, 314, }, /* 1007 */ + { 124, 6, 12, 0, 0, 18432, 138, }, /* 1008 */ + { 124, 21, 12, 0, 0, 18432, 74, }, /* 1009 */ + { 124, 21, 12, 0, 0, 18432, 110, }, /* 1010 */ + { 127, 7, 12, 0, 0, 18432, 84, }, /* 1011 */ + { 127, 10, 5, 0, 0, 18432, 154, }, /* 1012 */ + { 127, 12, 3, 0, 0, 26624, 106, }, /* 1013 */ + { 127, 21, 12, 0, 0, 18432, 128, }, /* 1014 */ + { 127, 6, 12, 0, 0, 18432, 138, }, /* 1015 */ + { 127, 12, 3, 0, 0, 26624, 158, }, /* 1016 */ + { 0, 5, 12, 0, -928, 18432, 78, }, /* 1017 */ + { 24, 5, 12, 0, -38864, 18432, 70, }, /* 1018 */ + { 127, 10, 5, 0, 0, 18432, 174, }, /* 1019 */ + { 127, 13, 12, 0, 0, 18432, 144, }, /* 1020 */ + { 22, 7, 9, 0, 0, 18432, 84, }, /* 1021 */ + { 22, 7, 10, 0, 0, 18432, 84, }, /* 1022 */ + { 98, 4, 12, 0, 0, 18432, 0, }, /* 1023 */ + { 98, 3, 12, 0, 0, 18432, 0, }, /* 1024 */ + { 30, 7, 12, 0, 0, 18432, 304, }, /* 1025 */ + { 0, 5, 12, 0, 1, 18432, 70, }, /* 1026 */ + { 0, 5, 12, 0, -1, 18432, 70, }, /* 1027 */ + { 4, 25, 12, 0, 0, 12288, 122, }, /* 1028 */ + { 5, 7, 12, 0, 0, 0, 316, }, /* 1029 */ + { 99, 18, 12, 0, 0, 29088, 54, }, /* 1030 */ + { 99, 22, 12, 0, 0, 29088, 54, }, /* 1031 */ + { 98, 2, 12, 0, 0, 6144, 318, }, /* 1032 */ + { 5, 7, 12, 0, 0, 420, 84, }, /* 1033 */ + { 5, 26, 12, 0, 0, 29092, 74, }, /* 1034 */ + { 106, 12, 3, 0, 0, 26624, 192, }, /* 1035 */ + { 106, 12, 3, 0, 0, 26624, 320, }, /* 1036 */ + { 99, 21, 12, 0, 0, 28672, 74, }, /* 1037 */ + { 99, 21, 12, 0, 0, 28672, 128, }, /* 1038 */ + { 99, 21, 12, 0, 0, 28672, 126, }, /* 1039 */ + { 99, 22, 12, 0, 0, 28672, 74, }, /* 1040 */ + { 99, 18, 12, 0, 0, 28672, 74, }, /* 1041 */ + { 99, 17, 12, 0, 0, 28672, 130, }, /* 1042 */ + { 99, 22, 12, 0, 0, 28672, 322, }, /* 1043 */ + { 99, 18, 12, 0, 0, 28672, 322, }, /* 1044 */ + { 99, 21, 12, 0, 0, 8192, 110, }, /* 1045 */ + { 99, 21, 12, 0, 0, 8192, 324, }, /* 1046 */ + { 99, 21, 12, 0, 0, 8192, 326, }, /* 1047 */ + { 99, 22, 12, 0, 0, 28672, 172, }, /* 1048 */ + { 99, 18, 12, 0, 0, 28672, 172, }, /* 1049 */ + { 99, 21, 12, 0, 0, 14336, 74, }, /* 1050 */ + { 99, 21, 12, 0, 0, 28672, 122, }, /* 1051 */ + { 99, 25, 12, 0, 0, 12288, 122, }, /* 1052 */ + { 99, 17, 12, 0, 0, 12288, 328, }, /* 1053 */ + { 99, 25, 12, 0, 0, 28672, 330, }, /* 1054 */ + { 99, 21, 12, 0, 0, 28672, 322, }, /* 1055 */ + { 99, 21, 12, 0, 0, 28672, 332, }, /* 1056 */ + { 99, 17, 12, 0, 0, 12288, 130, }, /* 1057 */ + { 99, 21, 12, 0, 0, 8192, 74, }, /* 1058 */ + { 99, 13, 12, 0, 0, 10240, 334, }, /* 1059 */ + { 0, 9, 12, 0, 32, 18432, 336, }, /* 1060 */ + { 99, 24, 12, 0, 0, 28672, 338, }, /* 1061 */ + { 0, 5, 12, 0, -32, 18432, 340, }, /* 1062 */ + { 99, 21, 12, 0, 0, 29036, 128, }, /* 1063 */ + { 99, 22, 12, 0, 0, 29036, 342, }, /* 1064 */ + { 99, 18, 12, 0, 0, 29036, 342, }, /* 1065 */ + { 99, 21, 12, 0, 0, 29036, 110, }, /* 1066 */ + { 99, 6, 3, 0, 0, 18804, 344, }, /* 1067 */ + { 99, 1, 2, 0, 0, 28672, 346, }, /* 1068 */ + { 39, 7, 12, 0, 0, 18432, 84, }, /* 1069 */ + { 99, 21, 12, 0, 0, 18856, 74, }, /* 1070 */ + { 99, 21, 12, 0, 0, 29096, 74, }, /* 1071 */ + { 99, 21, 12, 0, 0, 18860, 74, }, /* 1072 */ + { 99, 15, 12, 0, 0, 18864, 74, }, /* 1073 */ + { 99, 26, 12, 0, 0, 18860, 74, }, /* 1074 */ + { 1, 14, 12, 0, 0, 28672, 84, }, /* 1075 */ + { 1, 15, 12, 0, 0, 28672, 74, }, /* 1076 */ + { 1, 26, 12, 0, 0, 28672, 74, }, /* 1077 */ + { 1, 26, 12, 0, 0, 18432, 74, }, /* 1078 */ + { 50, 7, 12, 0, 0, 18432, 84, }, /* 1079 */ + { 51, 7, 12, 0, 0, 18432, 84, }, /* 1080 */ + { 106, 12, 3, 0, 0, 27060, 98, }, /* 1081 */ + { 99, 15, 12, 0, 0, 10676, 74, }, /* 1082 */ + { 104, 7, 12, 0, 0, 18432, 84, }, /* 1083 */ + { 104, 15, 12, 0, 0, 18432, 74, }, /* 1084 */ + { 32, 7, 12, 0, 0, 18432, 84, }, /* 1085 */ + { 32, 14, 12, 0, 0, 18432, 84, }, /* 1086 */ + { 73, 7, 12, 0, 0, 18432, 84, }, /* 1087 */ + { 73, 12, 3, 0, 0, 26624, 106, }, /* 1088 */ + { 107, 7, 12, 0, 0, 18432, 84, }, /* 1089 */ + { 107, 21, 12, 0, 0, 18432, 110, }, /* 1090 */ + { 111, 7, 12, 0, 0, 18432, 84, }, /* 1091 */ + { 111, 21, 12, 0, 0, 18432, 110, }, /* 1092 */ + { 111, 14, 12, 0, 0, 18432, 84, }, /* 1093 */ + { 105, 9, 12, 0, 40, 18432, 76, }, /* 1094 */ + { 105, 5, 12, 0, -40, 18432, 78, }, /* 1095 */ + { 40, 7, 12, 0, 0, 18432, 84, }, /* 1096 */ + { 108, 7, 12, 0, 0, 18432, 84, }, /* 1097 */ + { 108, 13, 12, 0, 0, 18432, 144, }, /* 1098 */ + { 80, 9, 12, 0, 40, 18432, 76, }, /* 1099 */ + { 80, 5, 12, 0, -40, 18432, 78, }, /* 1100 */ + { 66, 7, 12, 0, 0, 18432, 84, }, /* 1101 */ + { 64, 7, 12, 0, 0, 18432, 84, }, /* 1102 */ + { 64, 21, 12, 0, 0, 18432, 74, }, /* 1103 */ + { 167, 9, 12, 0, 39, 18432, 76, }, /* 1104 */ + { 167, 5, 12, 0, -39, 18432, 78, }, /* 1105 */ + { 96, 7, 12, 0, 0, 18432, 84, }, /* 1106 */ + { 69, 7, 12, 0, 0, 18432, 84, }, /* 1107 */ + { 0, 6, 12, 0, 0, 18432, 96, }, /* 1108 */ + { 41, 7, 12, 0, 0, 34816, 84, }, /* 1109 */ + { 128, 7, 12, 0, 0, 34816, 84, }, /* 1110 */ + { 128, 21, 12, 0, 0, 34816, 110, }, /* 1111 */ + { 128, 15, 12, 0, 0, 34816, 74, }, /* 1112 */ + { 143, 7, 12, 0, 0, 34816, 84, }, /* 1113 */ + { 143, 26, 12, 0, 0, 34816, 74, }, /* 1114 */ + { 143, 15, 12, 0, 0, 34816, 74, }, /* 1115 */ + { 142, 7, 12, 0, 0, 34816, 84, }, /* 1116 */ + { 142, 15, 12, 0, 0, 34816, 74, }, /* 1117 */ + { 149, 7, 12, 0, 0, 34816, 84, }, /* 1118 */ + { 149, 15, 12, 0, 0, 34816, 74, }, /* 1119 */ + { 115, 7, 12, 0, 0, 34816, 84, }, /* 1120 */ + { 115, 15, 12, 0, 0, 34816, 74, }, /* 1121 */ + { 115, 21, 12, 0, 0, 28672, 110, }, /* 1122 */ + { 52, 7, 12, 0, 0, 34816, 84, }, /* 1123 */ + { 52, 21, 12, 0, 0, 34816, 74, }, /* 1124 */ + { 61, 7, 12, 0, 0, 34816, 84, }, /* 1125 */ + { 134, 7, 12, 0, 0, 34816, 84, }, /* 1126 */ + { 134, 15, 12, 0, 0, 34816, 74, }, /* 1127 */ + { 112, 7, 12, 0, 0, 34816, 84, }, /* 1128 */ + { 112, 12, 3, 0, 0, 26624, 106, }, /* 1129 */ + { 112, 12, 3, 0, 0, 26624, 98, }, /* 1130 */ + { 112, 12, 3, 0, 0, 26624, 158, }, /* 1131 */ + { 112, 15, 12, 0, 0, 34816, 74, }, /* 1132 */ + { 112, 21, 12, 0, 0, 34816, 74, }, /* 1133 */ + { 112, 21, 12, 0, 0, 34816, 128, }, /* 1134 */ + { 129, 7, 12, 0, 0, 34816, 84, }, /* 1135 */ + { 129, 15, 12, 0, 0, 34816, 74, }, /* 1136 */ + { 129, 21, 12, 0, 0, 34816, 74, }, /* 1137 */ + { 141, 7, 12, 0, 0, 34816, 84, }, /* 1138 */ + { 141, 15, 12, 0, 0, 34816, 74, }, /* 1139 */ + { 71, 7, 12, 0, 0, 34816, 84, }, /* 1140 */ + { 71, 26, 12, 0, 0, 34816, 74, }, /* 1141 */ + { 71, 12, 3, 0, 0, 26624, 98, }, /* 1142 */ + { 71, 15, 12, 0, 0, 34816, 74, }, /* 1143 */ + { 71, 21, 12, 0, 0, 34816, 110, }, /* 1144 */ + { 71, 21, 12, 0, 0, 35256, 110, }, /* 1145 */ + { 71, 21, 12, 0, 0, 34816, 74, }, /* 1146 */ + { 53, 7, 12, 0, 0, 34816, 84, }, /* 1147 */ + { 53, 21, 12, 0, 0, 28672, 74, }, /* 1148 */ + { 53, 21, 12, 0, 0, 28672, 110, }, /* 1149 */ + { 130, 7, 12, 0, 0, 34816, 84, }, /* 1150 */ + { 130, 15, 12, 0, 0, 34816, 74, }, /* 1151 */ + { 131, 7, 12, 0, 0, 34816, 84, }, /* 1152 */ + { 131, 15, 12, 0, 0, 34816, 74, }, /* 1153 */ + { 74, 7, 12, 0, 0, 34816, 84, }, /* 1154 */ + { 74, 21, 12, 0, 0, 34816, 110, }, /* 1155 */ + { 74, 15, 12, 0, 0, 34816, 74, }, /* 1156 */ + { 57, 7, 12, 0, 0, 34816, 84, }, /* 1157 */ + { 78, 9, 12, 0, 64, 34816, 76, }, /* 1158 */ + { 78, 5, 12, 0, -64, 34816, 78, }, /* 1159 */ + { 78, 15, 12, 0, 0, 34816, 74, }, /* 1160 */ + { 85, 7, 12, 0, 0, 0, 84, }, /* 1161 */ + { 85, 7, 12, 0, 0, 0, 314, }, /* 1162 */ + { 85, 12, 3, 0, 0, 26624, 132, }, /* 1163 */ + { 85, 13, 12, 0, 0, 2048, 144, }, /* 1164 */ + { 92, 13, 12, 0, 0, 2048, 144, }, /* 1165 */ + { 92, 7, 12, 0, 0, 34816, 84, }, /* 1166 */ + { 92, 6, 12, 0, 0, 34816, 96, }, /* 1167 */ + { 92, 9, 12, 0, 32, 34816, 76, }, /* 1168 */ + { 92, 12, 3, 0, 0, 26624, 132, }, /* 1169 */ + { 92, 12, 3, 0, 0, 26624, 164, }, /* 1170 */ + { 92, 12, 3, 0, 0, 26624, 98, }, /* 1171 */ + { 92, 17, 12, 0, 0, 28672, 130, }, /* 1172 */ + { 92, 6, 12, 0, 0, 34816, 138, }, /* 1173 */ + { 92, 5, 12, 0, -32, 34816, 78, }, /* 1174 */ + { 92, 25, 12, 0, 0, 34816, 122, }, /* 1175 */ + { 5, 15, 12, 0, 0, 2048, 74, }, /* 1176 */ + { 88, 7, 12, 0, 0, 34816, 84, }, /* 1177 */ + { 88, 12, 3, 0, 0, 26624, 106, }, /* 1178 */ + { 88, 17, 12, 0, 0, 34816, 130, }, /* 1179 */ + { 159, 7, 12, 0, 0, 34816, 84, }, /* 1180 */ + { 159, 15, 12, 0, 0, 34816, 74, }, /* 1181 */ + { 86, 7, 12, 0, 0, 0, 84, }, /* 1182 */ + { 86, 12, 3, 0, 0, 26624, 98, }, /* 1183 */ + { 86, 15, 12, 0, 0, 0, 74, }, /* 1184 */ + { 86, 21, 12, 0, 0, 0, 128, }, /* 1185 */ + { 90, 7, 12, 0, 0, 34816, 84, }, /* 1186 */ + { 90, 12, 3, 0, 0, 26624, 98, }, /* 1187 */ + { 90, 21, 12, 0, 0, 34816, 128, }, /* 1188 */ + { 163, 7, 12, 0, 0, 34816, 84, }, /* 1189 */ + { 163, 15, 12, 0, 0, 34816, 74, }, /* 1190 */ + { 160, 7, 12, 0, 0, 34816, 84, }, /* 1191 */ + { 133, 10, 5, 0, 0, 18432, 154, }, /* 1192 */ + { 133, 12, 3, 0, 0, 26624, 106, }, /* 1193 */ + { 133, 7, 12, 0, 0, 18432, 84, }, /* 1194 */ + { 133, 12, 3, 0, 0, 26624, 158, }, /* 1195 */ + { 133, 21, 12, 0, 0, 18432, 128, }, /* 1196 */ + { 133, 21, 12, 0, 0, 18432, 110, }, /* 1197 */ + { 133, 15, 12, 0, 0, 28672, 74, }, /* 1198 */ + { 133, 13, 12, 0, 0, 18432, 144, }, /* 1199 */ + { 133, 12, 3, 0, 0, 26624, 290, }, /* 1200 */ + { 58, 12, 3, 0, 0, 26624, 106, }, /* 1201 */ + { 58, 10, 5, 0, 0, 18432, 154, }, /* 1202 */ + { 58, 7, 12, 0, 0, 18432, 84, }, /* 1203 */ + { 58, 12, 3, 0, 0, 26624, 158, }, /* 1204 */ + { 58, 12, 3, 0, 0, 26624, 98, }, /* 1205 */ + { 58, 21, 12, 0, 0, 18432, 74, }, /* 1206 */ + { 58, 1, 4, 0, 0, 18432, 134, }, /* 1207 */ + { 58, 21, 12, 0, 0, 18432, 128, }, /* 1208 */ + { 136, 7, 12, 0, 0, 18432, 84, }, /* 1209 */ + { 136, 13, 12, 0, 0, 18432, 144, }, /* 1210 */ + { 60, 12, 3, 0, 0, 26624, 106, }, /* 1211 */ + { 60, 7, 12, 0, 0, 18432, 84, }, /* 1212 */ + { 60, 10, 5, 0, 0, 18432, 154, }, /* 1213 */ + { 60, 12, 3, 0, 0, 26624, 158, }, /* 1214 */ + { 60, 13, 12, 0, 0, 18432, 144, }, /* 1215 */ + { 60, 21, 12, 0, 0, 18432, 74, }, /* 1216 */ + { 60, 21, 12, 0, 0, 18432, 128, }, /* 1217 */ + { 70, 7, 12, 0, 0, 18432, 84, }, /* 1218 */ + { 70, 12, 3, 0, 0, 26624, 98, }, /* 1219 */ + { 70, 21, 12, 0, 0, 18432, 74, }, /* 1220 */ + { 62, 12, 3, 0, 0, 26624, 106, }, /* 1221 */ + { 62, 10, 5, 0, 0, 18432, 154, }, /* 1222 */ + { 62, 7, 12, 0, 0, 18432, 84, }, /* 1223 */ + { 62, 10, 3, 0, 0, 18432, 188, }, /* 1224 */ + { 62, 7, 4, 0, 0, 18432, 84, }, /* 1225 */ + { 62, 21, 12, 0, 0, 18432, 128, }, /* 1226 */ + { 62, 21, 12, 0, 0, 18432, 74, }, /* 1227 */ + { 62, 12, 3, 0, 0, 26624, 104, }, /* 1228 */ + { 62, 12, 3, 0, 0, 26624, 98, }, /* 1229 */ + { 62, 13, 12, 0, 0, 18432, 144, }, /* 1230 */ + { 17, 15, 12, 0, 0, 18432, 74, }, /* 1231 */ + { 68, 7, 12, 0, 0, 18432, 84, }, /* 1232 */ + { 68, 10, 5, 0, 0, 18432, 154, }, /* 1233 */ + { 68, 12, 3, 0, 0, 26624, 106, }, /* 1234 */ + { 68, 10, 3, 0, 0, 18432, 188, }, /* 1235 */ + { 68, 12, 3, 0, 0, 26624, 98, }, /* 1236 */ + { 68, 12, 3, 0, 0, 26624, 162, }, /* 1237 */ + { 68, 21, 12, 0, 0, 18432, 128, }, /* 1238 */ + { 68, 21, 12, 0, 0, 18432, 110, }, /* 1239 */ + { 68, 21, 12, 0, 0, 18432, 74, }, /* 1240 */ + { 77, 7, 12, 0, 0, 18432, 84, }, /* 1241 */ + { 77, 21, 12, 0, 0, 18432, 128, }, /* 1242 */ + { 75, 7, 12, 0, 0, 18432, 84, }, /* 1243 */ + { 75, 12, 3, 0, 0, 26624, 106, }, /* 1244 */ + { 75, 10, 5, 0, 0, 18432, 154, }, /* 1245 */ + { 75, 12, 3, 0, 0, 26624, 98, }, /* 1246 */ + { 75, 12, 3, 0, 0, 26624, 158, }, /* 1247 */ + { 75, 13, 12, 0, 0, 18432, 144, }, /* 1248 */ + { 67, 12, 3, 0, 0, 26624, 106, }, /* 1249 */ + { 67, 12, 3, 0, 0, 26836, 106, }, /* 1250 */ + { 67, 10, 5, 0, 0, 18432, 154, }, /* 1251 */ + { 67, 10, 5, 0, 0, 18644, 154, }, /* 1252 */ + { 67, 7, 12, 0, 0, 18432, 84, }, /* 1253 */ + { 106, 12, 3, 0, 0, 26836, 98, }, /* 1254 */ + { 67, 12, 3, 0, 0, 26836, 98, }, /* 1255 */ + { 67, 10, 3, 0, 0, 18432, 160, }, /* 1256 */ + { 67, 10, 3, 0, 0, 18432, 188, }, /* 1257 */ + { 67, 7, 12, 0, 0, 18432, 348, }, /* 1258 */ + { 67, 12, 3, 0, 0, 26624, 98, }, /* 1259 */ + { 97, 7, 12, 0, 0, 18432, 84, }, /* 1260 */ + { 97, 10, 3, 0, 0, 18432, 160, }, /* 1261 */ + { 97, 10, 5, 0, 0, 18432, 154, }, /* 1262 */ + { 97, 12, 3, 0, 0, 26624, 106, }, /* 1263 */ + { 97, 12, 3, 0, 0, 26624, 158, }, /* 1264 */ + { 97, 10, 3, 0, 0, 18432, 188, }, /* 1265 */ + { 97, 7, 4, 0, 0, 18432, 84, }, /* 1266 */ + { 97, 12, 3, 0, 0, 26624, 164, }, /* 1267 */ + { 97, 7, 12, 0, 0, 18432, 350, }, /* 1268 */ + { 97, 21, 12, 0, 0, 18432, 128, }, /* 1269 */ + { 97, 21, 12, 0, 0, 18432, 74, }, /* 1270 */ + { 97, 12, 3, 0, 0, 26624, 98, }, /* 1271 */ + { 153, 7, 12, 0, 0, 18432, 84, }, /* 1272 */ + { 153, 10, 5, 0, 0, 18432, 154, }, /* 1273 */ + { 153, 12, 3, 0, 0, 26624, 106, }, /* 1274 */ + { 153, 12, 3, 0, 0, 26624, 158, }, /* 1275 */ + { 153, 12, 3, 0, 0, 26624, 98, }, /* 1276 */ + { 153, 21, 12, 0, 0, 18432, 128, }, /* 1277 */ + { 153, 21, 12, 0, 0, 18432, 110, }, /* 1278 */ + { 153, 21, 12, 0, 0, 18432, 74, }, /* 1279 */ + { 153, 13, 12, 0, 0, 18432, 144, }, /* 1280 */ + { 153, 12, 3, 0, 0, 26624, 104, }, /* 1281 */ + { 76, 7, 12, 0, 0, 18432, 84, }, /* 1282 */ + { 76, 10, 3, 0, 0, 18432, 160, }, /* 1283 */ + { 76, 10, 5, 0, 0, 18432, 154, }, /* 1284 */ + { 76, 12, 3, 0, 0, 26624, 106, }, /* 1285 */ + { 76, 12, 3, 0, 0, 26624, 158, }, /* 1286 */ + { 76, 12, 3, 0, 0, 26624, 98, }, /* 1287 */ + { 76, 21, 12, 0, 0, 18432, 74, }, /* 1288 */ + { 76, 13, 12, 0, 0, 18432, 144, }, /* 1289 */ + { 145, 7, 12, 0, 0, 18432, 84, }, /* 1290 */ + { 145, 10, 3, 0, 0, 18432, 160, }, /* 1291 */ + { 145, 10, 5, 0, 0, 18432, 154, }, /* 1292 */ + { 145, 12, 3, 0, 0, 26624, 106, }, /* 1293 */ + { 145, 12, 3, 0, 0, 26624, 158, }, /* 1294 */ + { 145, 12, 3, 0, 0, 26624, 98, }, /* 1295 */ + { 145, 21, 12, 0, 0, 18432, 74, }, /* 1296 */ + { 145, 21, 12, 0, 0, 18432, 128, }, /* 1297 */ + { 145, 21, 12, 0, 0, 18432, 110, }, /* 1298 */ + { 145, 21, 12, 0, 0, 18432, 190, }, /* 1299 */ + { 72, 7, 12, 0, 0, 18432, 84, }, /* 1300 */ + { 72, 10, 5, 0, 0, 18432, 154, }, /* 1301 */ + { 72, 12, 3, 0, 0, 26624, 106, }, /* 1302 */ + { 72, 12, 3, 0, 0, 26624, 158, }, /* 1303 */ + { 72, 21, 12, 0, 0, 18432, 128, }, /* 1304 */ + { 72, 21, 12, 0, 0, 18432, 74, }, /* 1305 */ + { 72, 13, 12, 0, 0, 18432, 144, }, /* 1306 */ + { 63, 7, 12, 0, 0, 18432, 84, }, /* 1307 */ + { 63, 12, 3, 0, 0, 26624, 106, }, /* 1308 */ + { 63, 10, 5, 0, 0, 18432, 154, }, /* 1309 */ + { 63, 10, 3, 0, 0, 18432, 188, }, /* 1310 */ + { 63, 12, 3, 0, 0, 26624, 98, }, /* 1311 */ + { 63, 21, 12, 0, 0, 18432, 74, }, /* 1312 */ + { 63, 13, 12, 0, 0, 18432, 144, }, /* 1313 */ + { 147, 7, 12, 0, 0, 18432, 84, }, /* 1314 */ + { 147, 12, 3, 0, 0, 26624, 106, }, /* 1315 */ + { 147, 10, 5, 0, 0, 18432, 154, }, /* 1316 */ + { 147, 10, 12, 0, 0, 18432, 154, }, /* 1317 */ + { 147, 12, 3, 0, 0, 26624, 158, }, /* 1318 */ + { 147, 13, 12, 0, 0, 18432, 144, }, /* 1319 */ + { 147, 15, 12, 0, 0, 18432, 74, }, /* 1320 */ + { 147, 21, 12, 0, 0, 18432, 128, }, /* 1321 */ + { 147, 26, 12, 0, 0, 18432, 74, }, /* 1322 */ + { 83, 7, 12, 0, 0, 18432, 84, }, /* 1323 */ + { 83, 10, 5, 0, 0, 18432, 154, }, /* 1324 */ + { 83, 12, 3, 0, 0, 26624, 106, }, /* 1325 */ + { 83, 12, 3, 0, 0, 26624, 158, }, /* 1326 */ + { 83, 12, 3, 0, 0, 26624, 98, }, /* 1327 */ + { 83, 21, 12, 0, 0, 18432, 74, }, /* 1328 */ + { 146, 9, 12, 0, 32, 18432, 76, }, /* 1329 */ + { 146, 5, 12, 0, -32, 18432, 78, }, /* 1330 */ + { 146, 13, 12, 0, 0, 18432, 144, }, /* 1331 */ + { 146, 15, 12, 0, 0, 18432, 74, }, /* 1332 */ + { 146, 7, 12, 0, 0, 18432, 84, }, /* 1333 */ + { 164, 7, 12, 0, 0, 18432, 84, }, /* 1334 */ + { 164, 10, 3, 0, 0, 18432, 160, }, /* 1335 */ + { 164, 10, 5, 0, 0, 18432, 154, }, /* 1336 */ + { 164, 12, 3, 0, 0, 26624, 106, }, /* 1337 */ + { 164, 10, 3, 0, 0, 18432, 188, }, /* 1338 */ + { 164, 12, 3, 0, 0, 26624, 158, }, /* 1339 */ + { 164, 7, 4, 0, 0, 18432, 84, }, /* 1340 */ + { 164, 12, 3, 0, 0, 26624, 98, }, /* 1341 */ + { 164, 21, 12, 0, 0, 18432, 128, }, /* 1342 */ + { 164, 21, 12, 0, 0, 18432, 74, }, /* 1343 */ + { 164, 13, 12, 0, 0, 18432, 144, }, /* 1344 */ + { 87, 7, 12, 0, 0, 18432, 84, }, /* 1345 */ + { 87, 10, 5, 0, 0, 18432, 154, }, /* 1346 */ + { 87, 12, 3, 0, 0, 26624, 106, }, /* 1347 */ + { 87, 12, 3, 0, 0, 26624, 158, }, /* 1348 */ + { 87, 21, 12, 0, 0, 18432, 74, }, /* 1349 */ + { 156, 7, 12, 0, 0, 18432, 84, }, /* 1350 */ + { 156, 12, 3, 0, 0, 26624, 106, }, /* 1351 */ + { 156, 12, 3, 0, 0, 18432, 106, }, /* 1352 */ + { 156, 12, 3, 0, 0, 26624, 104, }, /* 1353 */ + { 156, 12, 3, 0, 0, 26624, 158, }, /* 1354 */ + { 156, 10, 5, 0, 0, 18432, 154, }, /* 1355 */ + { 156, 7, 4, 0, 0, 18432, 84, }, /* 1356 */ + { 156, 21, 12, 0, 0, 18432, 74, }, /* 1357 */ + { 156, 21, 12, 0, 0, 18432, 128, }, /* 1358 */ + { 155, 7, 12, 0, 0, 18432, 84, }, /* 1359 */ + { 155, 12, 3, 0, 0, 26624, 106, }, /* 1360 */ + { 155, 10, 5, 0, 0, 18432, 154, }, /* 1361 */ + { 155, 7, 4, 0, 0, 18432, 84, }, /* 1362 */ + { 155, 12, 3, 0, 0, 26624, 352, }, /* 1363 */ + { 155, 12, 3, 0, 0, 26624, 158, }, /* 1364 */ + { 155, 21, 12, 0, 0, 18432, 74, }, /* 1365 */ + { 155, 21, 12, 0, 0, 18432, 128, }, /* 1366 */ + { 155, 21, 12, 0, 0, 18432, 110, }, /* 1367 */ + { 144, 7, 12, 0, 0, 18432, 84, }, /* 1368 */ + { 95, 7, 12, 0, 0, 18432, 84, }, /* 1369 */ + { 95, 21, 12, 0, 0, 18432, 74, }, /* 1370 */ + { 95, 13, 12, 0, 0, 18432, 144, }, /* 1371 */ + { 151, 7, 12, 0, 0, 18432, 84, }, /* 1372 */ + { 151, 10, 5, 0, 0, 18432, 154, }, /* 1373 */ + { 151, 12, 3, 0, 0, 26624, 106, }, /* 1374 */ + { 151, 12, 3, 0, 0, 18432, 158, }, /* 1375 */ + { 151, 21, 12, 0, 0, 18432, 128, }, /* 1376 */ + { 151, 21, 12, 0, 0, 18432, 110, }, /* 1377 */ + { 151, 21, 12, 0, 0, 18432, 74, }, /* 1378 */ + { 151, 13, 12, 0, 0, 18432, 144, }, /* 1379 */ + { 151, 15, 12, 0, 0, 18432, 74, }, /* 1380 */ + { 152, 21, 12, 0, 0, 18432, 74, }, /* 1381 */ + { 152, 21, 12, 0, 0, 18432, 110, }, /* 1382 */ + { 152, 7, 12, 0, 0, 18432, 84, }, /* 1383 */ + { 152, 12, 3, 0, 0, 26624, 106, }, /* 1384 */ + { 152, 10, 5, 0, 0, 18432, 154, }, /* 1385 */ + { 82, 7, 12, 0, 0, 18432, 84, }, /* 1386 */ + { 82, 12, 3, 0, 0, 26624, 106, }, /* 1387 */ + { 82, 12, 3, 0, 0, 26624, 98, }, /* 1388 */ + { 82, 12, 3, 0, 0, 26624, 158, }, /* 1389 */ + { 82, 7, 4, 0, 0, 18432, 84, }, /* 1390 */ + { 82, 13, 12, 0, 0, 18432, 144, }, /* 1391 */ + { 84, 7, 12, 0, 0, 18432, 84, }, /* 1392 */ + { 84, 10, 5, 0, 0, 18432, 154, }, /* 1393 */ + { 84, 12, 3, 0, 0, 26624, 106, }, /* 1394 */ + { 84, 12, 3, 0, 0, 26624, 158, }, /* 1395 */ + { 84, 13, 12, 0, 0, 18432, 144, }, /* 1396 */ + { 157, 7, 12, 0, 0, 18432, 84, }, /* 1397 */ + { 157, 12, 3, 0, 0, 26624, 106, }, /* 1398 */ + { 157, 10, 5, 0, 0, 18432, 154, }, /* 1399 */ + { 157, 21, 12, 0, 0, 18432, 128, }, /* 1400 */ + { 168, 12, 3, 0, 0, 26624, 106, }, /* 1401 */ + { 168, 7, 4, 0, 0, 18432, 84, }, /* 1402 */ + { 168, 10, 5, 0, 0, 18432, 154, }, /* 1403 */ + { 168, 7, 12, 0, 0, 18432, 84, }, /* 1404 */ + { 168, 10, 3, 0, 0, 18432, 188, }, /* 1405 */ + { 168, 12, 3, 0, 0, 26624, 158, }, /* 1406 */ + { 168, 21, 12, 0, 0, 18432, 128, }, /* 1407 */ + { 168, 21, 12, 0, 0, 18432, 74, }, /* 1408 */ + { 168, 13, 12, 0, 0, 18432, 144, }, /* 1409 */ + { 168, 12, 3, 0, 0, 26624, 98, }, /* 1410 */ + { 13, 15, 12, 0, 0, 18432, 74, }, /* 1411 */ + { 13, 21, 12, 0, 0, 18432, 74, }, /* 1412 */ + { 114, 7, 12, 0, 0, 18432, 84, }, /* 1413 */ + { 114, 14, 12, 0, 0, 18432, 84, }, /* 1414 */ + { 114, 21, 12, 0, 0, 18432, 110, }, /* 1415 */ + { 89, 7, 12, 0, 0, 18432, 84, }, /* 1416 */ + { 89, 21, 12, 0, 0, 18432, 74, }, /* 1417 */ + { 125, 7, 12, 0, 0, 18432, 84, }, /* 1418 */ + { 125, 1, 2, 0, 0, 18432, 346, }, /* 1419 */ + { 125, 12, 3, 0, 0, 26624, 104, }, /* 1420 */ + { 125, 12, 3, 0, 0, 26624, 98, }, /* 1421 */ + { 148, 7, 12, 0, 0, 18432, 84, }, /* 1422 */ + { 93, 7, 12, 0, 0, 18432, 84, }, /* 1423 */ + { 93, 12, 3, 0, 0, 26624, 106, }, /* 1424 */ + { 93, 10, 5, 0, 0, 18432, 154, }, /* 1425 */ + { 93, 12, 3, 0, 0, 26624, 158, }, /* 1426 */ + { 93, 13, 12, 0, 0, 18432, 144, }, /* 1427 */ + { 140, 7, 12, 0, 0, 18432, 84, }, /* 1428 */ + { 140, 13, 12, 0, 0, 18432, 144, }, /* 1429 */ + { 140, 21, 12, 0, 0, 18432, 128, }, /* 1430 */ + { 166, 7, 12, 0, 0, 18432, 84, }, /* 1431 */ + { 166, 13, 12, 0, 0, 18432, 144, }, /* 1432 */ + { 137, 7, 12, 0, 0, 18432, 84, }, /* 1433 */ + { 137, 12, 3, 0, 0, 26624, 98, }, /* 1434 */ + { 137, 21, 12, 0, 0, 18432, 128, }, /* 1435 */ + { 138, 7, 12, 0, 0, 18432, 84, }, /* 1436 */ + { 138, 12, 3, 0, 0, 26624, 98, }, /* 1437 */ + { 138, 21, 12, 0, 0, 18432, 128, }, /* 1438 */ + { 138, 21, 12, 0, 0, 18432, 110, }, /* 1439 */ + { 138, 21, 12, 0, 0, 18432, 74, }, /* 1440 */ + { 138, 26, 12, 0, 0, 18432, 74, }, /* 1441 */ + { 138, 6, 12, 0, 0, 18432, 148, }, /* 1442 */ + { 138, 6, 12, 0, 0, 18432, 138, }, /* 1443 */ + { 138, 13, 12, 0, 0, 18432, 144, }, /* 1444 */ + { 138, 15, 12, 0, 0, 18432, 74, }, /* 1445 */ + { 170, 6, 12, 0, 0, 18432, 148, }, /* 1446 */ + { 170, 7, 12, 0, 0, 18432, 84, }, /* 1447 */ + { 170, 7, 7, 0, 0, 18432, 84, }, /* 1448 */ + { 170, 6, 12, 0, 0, 18432, 94, }, /* 1449 */ + { 170, 21, 12, 0, 0, 18432, 74, }, /* 1450 */ + { 170, 21, 12, 0, 0, 18432, 128, }, /* 1451 */ + { 170, 13, 12, 0, 0, 18432, 144, }, /* 1452 */ + { 158, 9, 12, 0, 32, 18432, 76, }, /* 1453 */ + { 158, 5, 12, 0, -32, 18432, 78, }, /* 1454 */ + { 158, 15, 12, 0, 0, 18432, 74, }, /* 1455 */ + { 158, 21, 12, 0, 0, 18432, 110, }, /* 1456 */ + { 158, 21, 12, 0, 0, 18432, 128, }, /* 1457 */ + { 158, 21, 12, 0, 0, 18432, 74, }, /* 1458 */ + { 135, 7, 12, 0, 0, 18432, 84, }, /* 1459 */ + { 135, 12, 3, 0, 0, 26624, 106, }, /* 1460 */ + { 135, 10, 5, 0, 0, 18432, 154, }, /* 1461 */ + { 135, 12, 3, 0, 0, 26624, 132, }, /* 1462 */ + { 135, 6, 12, 0, 0, 18432, 94, }, /* 1463 */ + { 81, 6, 12, 0, 0, 18432, 138, }, /* 1464 */ + { 154, 6, 12, 0, 0, 18432, 138, }, /* 1465 */ + { 30, 21, 12, 0, 0, 28672, 74, }, /* 1466 */ + { 165, 12, 3, 0, 0, 26624, 354, }, /* 1467 */ + { 30, 10, 3, 0, 0, 18432, 356, }, /* 1468 */ + { 81, 7, 12, 0, 0, 18432, 304, }, /* 1469 */ + { 165, 7, 12, 0, 0, 18432, 304, }, /* 1470 */ + { 28, 6, 12, 0, 0, 18432, 94, }, /* 1471 */ + { 154, 7, 12, 0, 0, 18432, 304, }, /* 1472 */ + { 65, 7, 12, 0, 0, 18432, 84, }, /* 1473 */ + { 65, 26, 12, 0, 0, 18432, 74, }, /* 1474 */ + { 65, 12, 3, 0, 0, 26624, 104, }, /* 1475 */ + { 65, 12, 3, 0, 0, 26624, 106, }, /* 1476 */ + { 65, 21, 12, 0, 0, 18432, 128, }, /* 1477 */ + { 99, 1, 2, 0, 0, 6472, 66, }, /* 1478 */ + { 99, 13, 12, 0, 0, 10240, 144, }, /* 1479 */ + { 99, 10, 3, 0, 0, 18432, 358, }, /* 1480 */ + { 99, 10, 3, 0, 0, 18432, 306, }, /* 1481 */ + { 1, 12, 3, 0, 0, 26624, 104, }, /* 1482 */ + { 99, 25, 12, 0, 0, 28672, 360, }, /* 1483 */ + { 99, 13, 12, 0, 0, 10240, 226, }, /* 1484 */ + { 150, 26, 12, 0, 0, 18432, 74, }, /* 1485 */ + { 150, 12, 3, 0, 0, 26624, 104, }, /* 1486 */ + { 150, 21, 12, 0, 0, 18432, 110, }, /* 1487 */ + { 150, 21, 12, 0, 0, 18432, 128, }, /* 1488 */ + { 150, 21, 12, 0, 0, 18432, 74, }, /* 1489 */ + { 44, 12, 3, 0, 0, 26624, 106, }, /* 1490 */ + { 2, 6, 12, 0, 0, 18432, 92, }, /* 1491 */ + { 161, 7, 12, 0, 0, 18432, 84, }, /* 1492 */ + { 161, 12, 3, 0, 0, 26624, 98, }, /* 1493 */ + { 161, 6, 12, 0, 0, 18432, 148, }, /* 1494 */ + { 161, 6, 12, 0, 0, 18432, 138, }, /* 1495 */ + { 161, 13, 12, 0, 0, 18432, 144, }, /* 1496 */ + { 161, 26, 12, 0, 0, 18432, 74, }, /* 1497 */ + { 91, 7, 12, 0, 0, 18432, 84, }, /* 1498 */ + { 91, 12, 3, 0, 0, 26624, 98, }, /* 1499 */ + { 162, 7, 12, 0, 0, 18432, 84, }, /* 1500 */ + { 162, 12, 3, 0, 0, 26624, 98, }, /* 1501 */ + { 162, 13, 12, 0, 0, 18432, 144, }, /* 1502 */ + { 162, 23, 12, 0, 0, 14336, 74, }, /* 1503 */ + { 169, 7, 12, 0, 0, 18432, 84, }, /* 1504 */ + { 169, 6, 12, 0, 0, 18432, 148, }, /* 1505 */ + { 169, 12, 3, 0, 0, 26624, 104, }, /* 1506 */ + { 169, 13, 12, 0, 0, 18432, 144, }, /* 1507 */ + { 94, 7, 12, 0, 0, 18432, 84, }, /* 1508 */ + { 94, 12, 3, 0, 0, 26624, 98, }, /* 1509 */ + { 94, 12, 3, 0, 0, 26624, 164, }, /* 1510 */ + { 94, 13, 12, 0, 0, 18432, 144, }, /* 1511 */ + { 94, 21, 12, 0, 0, 18432, 74, }, /* 1512 */ + { 139, 7, 12, 0, 0, 34816, 84, }, /* 1513 */ + { 139, 15, 12, 0, 0, 34816, 74, }, /* 1514 */ + { 139, 12, 3, 0, 0, 26624, 98, }, /* 1515 */ + { 79, 9, 12, 0, 34, 34816, 76, }, /* 1516 */ + { 79, 5, 12, 0, -34, 34816, 78, }, /* 1517 */ + { 79, 12, 3, 0, 0, 26624, 164, }, /* 1518 */ + { 79, 12, 3, 0, 0, 26624, 106, }, /* 1519 */ + { 79, 12, 3, 0, 0, 26624, 98, }, /* 1520 */ + { 79, 6, 12, 0, 0, 34816, 148, }, /* 1521 */ + { 79, 13, 12, 0, 0, 34816, 144, }, /* 1522 */ + { 79, 21, 12, 0, 0, 34816, 74, }, /* 1523 */ + { 99, 15, 12, 0, 0, 0, 74, }, /* 1524 */ + { 99, 26, 12, 0, 0, 0, 74, }, /* 1525 */ + { 99, 23, 12, 0, 0, 0, 74, }, /* 1526 */ + { 5, 7, 12, 0, 0, 0, 254, }, /* 1527 */ + { 99, 26, 14, 0, 0, 28672, 362, }, /* 1528 */ + { 99, 26, 14, 0, 0, 28672, 364, }, /* 1529 */ + { 98, 2, 14, 0, 0, 18432, 366, }, /* 1530 */ + { 99, 26, 12, 0, 0, 18432, 368, }, /* 1531 */ + { 99, 26, 14, 0, 0, 18432, 370, }, /* 1532 */ + { 99, 26, 14, 0, 0, 18432, 364, }, /* 1533 */ + { 99, 26, 11, 0, 0, 18432, 372, }, /* 1534 */ + { 27, 26, 12, 0, 0, 18432, 74, }, /* 1535 */ + { 99, 26, 14, 0, 0, 18432, 250, }, /* 1536 */ + { 99, 26, 14, 0, 0, 18784, 364, }, /* 1537 */ + { 99, 26, 14, 0, 0, 28672, 374, }, /* 1538 */ + { 99, 26, 14, 0, 0, 28672, 376, }, /* 1539 */ + { 99, 24, 3, 0, 0, 28672, 378, }, /* 1540 */ + { 99, 26, 14, 0, 0, 28672, 380, }, /* 1541 */ + { 99, 1, 3, 0, 0, 6144, 382, }, /* 1542 */ }; const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ @@ -1885,36 +2129,36 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 128,128,129,129,130,131,132,133,134,135,136,137,138,139,140,141, /* U+F800 */ 142,143,144,145,146,147,148,149,150,151,152,153,154,154,155,156, /* U+10000 */ 157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172, /* U+10800 */ -173,174,175,176,177,178,179,146,180,181,146,182,183,184,185,146, /* U+11000 */ -186,187,188,189,190,191,192,146,193,194,195,196,146,197,198,199, /* U+11800 */ -200,200,200,200,200,200,200,201,202,200,203,146,146,146,146,146, /* U+12000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,204, /* U+12800 */ -205,205,205,205,205,205,205,205,206,146,146,146,146,146,146,146, /* U+13000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+13800 */ -146,146,146,146,146,146,146,146,207,207,207,207,208,146,146,146, /* U+14000 */ +173,174,175,176,177,178,179,180,181,182,146,183,184,185,186,146, /* U+11000 */ +187,188,189,190,191,192,193,194,195,196,197,198,146,199,200,201, /* U+11800 */ +202,202,202,202,202,202,202,203,204,202,205,146,146,146,146,146, /* U+12000 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,206, /* U+12800 */ +207,207,207,207,207,207,207,207,208,207,207,207,207,207,207,207, /* U+13000 */ +207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207, /* U+13800 */ +207,207,207,207,207,207,207,209,210,210,210,210,211,146,146,146, /* U+14000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+14800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+15000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+15800 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+16000 */ -209,209,209,209,210,211,212,213,146,146,146,146,214,215,216,217, /* U+16800 */ -218,218,218,218,218,218,218,218,218,218,218,218,218,218,218,218, /* U+17000 */ -218,218,218,218,218,218,218,218,218,218,218,218,218,218,218,218, /* U+17800 */ -218,218,218,218,218,218,218,218,218,218,218,218,218,218,218,219, /* U+18000 */ -218,218,218,218,218,218,220,220,220,221,222,146,146,146,146,146, /* U+18800 */ +146,146,212,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+16000 */ +213,213,213,213,214,215,216,217,146,146,218,146,219,220,221,222, /* U+16800 */ +223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223, /* U+17000 */ +223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223, /* U+17800 */ +223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,224, /* U+18000 */ +223,223,223,223,223,223,225,225,225,226,227,146,146,146,146,146, /* U+18800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+19000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+19800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+1A000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,223, /* U+1A800 */ -224,225,226,227,227,228,146,146,146,146,146,146,146,146,146,146, /* U+1B000 */ -146,146,146,146,146,146,146,146,229,230,146,146,146,146,146,146, /* U+1B800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,228, /* U+1A800 */ +229,230,231,232,232,233,146,146,146,146,146,146,146,146,146,146, /* U+1B000 */ +146,146,146,146,146,146,146,146,234,235,146,146,146,146,146,146, /* U+1B800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+1C000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,231,232, /* U+1C800 */ -233,234,235,236,237,238,239,146,240,241,242,243,244,245,246,247, /* U+1D000 */ -248,248,248,248,249,250,146,146,146,146,146,146,146,146,251,146, /* U+1D800 */ -252,253,254,146,146,255,146,146,146,256,146,146,146,146,146,257, /* U+1E000 */ -258,259,260,168,168,168,168,168,261,262,263,168,264,265,168,168, /* U+1E800 */ -266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281, /* U+1F000 */ -282,283,284,285,286,287,288,289,271,271,271,271,271,271,271,290, /* U+1F800 */ +146,146,146,146,146,146,146,146,236,237,236,236,236,238,239,240, /* U+1C800 */ +241,242,243,244,245,246,247,146,248,249,250,251,252,253,254,255, /* U+1D000 */ +256,256,256,256,257,258,146,146,146,146,146,146,146,146,259,146, /* U+1D800 */ +260,261,262,146,146,263,146,146,146,264,146,265,146,146,146,266, /* U+1E000 */ +267,268,269,270,270,270,270,270,271,272,273,270,274,275,270,270, /* U+1E800 */ +276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291, /* U+1F000 */ +292,293,294,295,296,297,236,298,281,281,281,281,281,281,281,299, /* U+1F800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+20000 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+20800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+21000 */ @@ -1935,23 +2179,23 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+28800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+29000 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+29800 */ -101,101,101,101,101,101,101,101,101,101,101,101,101,291,101,101, /* U+2A000 */ +101,101,101,101,101,101,101,101,101,101,101,101,101,300,101,101, /* U+2A000 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2A800 */ -101,101,101,101,101,101,101,101,101,101,101,101,101,101,292,101, /* U+2B000 */ -293,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2B800 */ +101,101,101,101,101,101,101,101,101,101,101,101,101,101,301,101, /* U+2B000 */ +302,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2B800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2C000 */ -101,101,101,101,101,101,101,101,101,101,101,101,101,294,101,101, /* U+2C800 */ +101,101,101,101,101,101,101,101,101,101,101,101,101,303,101,101, /* U+2C800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2D000 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2D800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+2E000 */ -101,101,101,101,101,101,101,295,146,146,146,146,146,146,146,146, /* U+2E800 */ +101,101,101,101,101,101,101,304,101,101,101,101,305,146,146,146, /* U+2E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+2F000 */ -129,129,129,129,296,146,146,146,146,146,146,146,146,146,146,297, /* U+2F800 */ +129,129,129,129,306,146,146,146,146,146,146,146,146,146,146,307, /* U+2F800 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+30000 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+30800 */ -101,101,101,101,101,101,298,101,101,101,101,101,101,101,101,101, /* U+31000 */ +101,101,101,101,101,101,308,101,101,101,101,101,101,101,101,101, /* U+31000 */ 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, /* U+31800 */ -101,101,101,101,101,101,101,299,146,146,146,146,146,146,146,146, /* U+32000 */ +101,101,101,101,101,101,101,309,146,146,146,146,146,146,146,146, /* U+32000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+32800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+33000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+33800 */ @@ -1978,7 +2222,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+3E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+3E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+3F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+3F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+3F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+40000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+40800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+41000 */ @@ -2010,7 +2254,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+4E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+4E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+4F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+4F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+4F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+50000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+50800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+51000 */ @@ -2042,7 +2286,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+5E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+5E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+5F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+5F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+5F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+60000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+60800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+61000 */ @@ -2074,7 +2318,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+6E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+6E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+6F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+6F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+6F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+70000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+70800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+71000 */ @@ -2106,7 +2350,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+7E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+7E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+7F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+7F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+7F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+80000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+80800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+81000 */ @@ -2138,7 +2382,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+8E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+8E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+8F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+8F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+8F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+90000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+90800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+91000 */ @@ -2170,7 +2414,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+9E000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+9E800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+9F000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+9F800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+9F800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+A0000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+A0800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+A1000 */ @@ -2202,7 +2446,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+AE000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+AE800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+AF000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+AF800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+AF800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+B0000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+B0800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+B1000 */ @@ -2234,7 +2478,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+BE000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+BE800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+BF000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+BF800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+BF800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+C0000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+C0800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+C1000 */ @@ -2266,7 +2510,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+CE000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+CE800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+CF000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+CF800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+CF800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+D0000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+D0800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+D1000 */ @@ -2298,9 +2542,9 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+DE000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+DE800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+DF000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+DF800 */ -300,301,302,303,301,301,301,301,301,301,301,301,301,301,301,301, /* U+E0000 */ -301,301,301,301,301,301,301,301,301,301,301,301,301,301,301,301, /* U+E0800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+DF800 */ +310,311,312,313,311,311,311,311,311,311,311,311,311,311,311,311, /* U+E0000 */ +311,311,311,311,311,311,311,311,311,311,311,311,311,311,311,311, /* U+E0800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+E1000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+E1800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+E2000 */ @@ -2330,7 +2574,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+EE000 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+EE800 */ 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, /* U+EF000 */ -146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,297, /* U+EF800 */ +146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,307, /* U+EF800 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+F0000 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+F0800 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+F1000 */ @@ -2362,7 +2606,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+FE000 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+FE800 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+FF000 */ -128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,304, /* U+FF800 */ +128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,314, /* U+FF800 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+100000 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+100800 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+101000 */ @@ -2394,10 +2638,10 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+10E000 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+10E800 */ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, /* U+10F000 */ -128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,304, /* U+10F800 */ +128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,314, /* U+10F800 */ }; -const uint16_t PRIV(ucd_stage2)[] = { /* 78080 bytes, block = 128 */ +const uint16_t PRIV(ucd_stage2)[] = { /* 80640 bytes, block = 128 */ /* block 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 3, 4, 0, 0, @@ -2431,553 +2675,553 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 78080 bytes, block = 128 */ /* block 3 */ 74, 75, 65, 66, 65, 66, 76, 65, 66, 77, 77, 65, 66, 70, 78, 79, - 80, 65, 66, 77, 81, 82, 83, 84, 65, 66, 85, 70, 83, 86, 87, 88, - 65, 66, 65, 66, 65, 66, 89, 65, 66, 89, 70, 70, 65, 66, 89, 65, - 66, 90, 90, 65, 66, 65, 66, 91, 65, 66, 70, 92, 65, 66, 70, 93, - 92, 92, 92, 92, 94, 95, 96, 97, 98, 99,100,101,102, 65, 66, 65, - 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66,103, 65, 66, + 80, 65, 66, 77, 81, 82, 83, 84, 65, 66, 85, 86, 83, 87, 88, 89, + 65, 66, 65, 66, 65, 66, 90, 65, 66, 90, 70, 70, 65, 66, 90, 65, + 66, 91, 91, 65, 66, 65, 66, 92, 65, 66, 70, 93, 65, 66, 70, 94, + 93, 93, 93, 93, 95, 96, 97, 98, 99,100,101,102,103, 65, 66, 65, + 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66,104, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, - 69,104,105,106, 65, 66,107,108, 65, 66, 65, 66, 65, 66, 65, 66, + 69,105,106,107, 65, 66,108,109, 65, 66, 65, 66, 65, 66, 65, 66, /* block 4 */ 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, -109, 70, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, - 65, 66, 65, 66, 70, 70, 70, 70, 70, 70,110, 65, 66,111,112,113, -113, 65, 66,114,115,116, 65, 66, 65, 67, 65, 66, 65, 66, 65, 66, -117,118,119,120,121, 70,122,122, 70,123, 70,124,125, 70, 70, 70, -122,126, 70,127, 70,128,129, 70,130,131,129,132,133, 70, 70,131, - 70,134,135, 70, 70,136, 70, 70, 70, 70, 70, 70, 70,137, 70, 70, +110, 70, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, + 65, 66, 65, 66, 70, 70, 70, 70, 70, 70,111, 65, 66,112,113,114, +114, 65, 66,115,116,117, 65, 66, 65, 67, 65, 66, 65, 66, 65, 66, +118,119,120,121,122, 70,123,123, 70,124, 70,125,126, 70, 70, 70, +123,127, 70,128,129,130,131, 70,132,133,131,134,135, 70, 70,133, + 70,136,137, 70, 70,138, 70, 70, 70, 70, 70, 70, 70,139, 70, 70, /* block 5 */ -138, 70,139,138, 70, 70, 70,140,138,141,142,142,143, 70, 70, 70, - 70, 70,144, 70, 92, 70, 70, 70, 70, 70, 70, 70, 70,145,146, 70, +140, 70,141,140, 70, 70, 70,142,140,143,144,144,145, 70, 70, 70, + 70, 70,146, 70, 93, 70, 70, 70, 70, 70, 70, 70, 70,147,148, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -147,147,148,147,147,147,147,147,147,149,149,150,150,150,150,150, -151,151, 46, 46, 46, 46,149,149,149,149,149,149,149,149,149,149, -152,152, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, -147,147,147,147,147, 46, 46, 46, 46, 46,153,153,149, 46,150, 46, +149,149,150,149,149,149,149,149,149,151,151,152,153,152,152,152, +154,154, 46, 46, 46, 46,151,155,151,155,155,155,151,156,151,151, +157,157, 46, 46, 46, 46, 46,158, 46,159, 46, 46, 46, 46, 46, 46, +149,149,149,149,149, 46, 46, 46, 46, 46,160,160,151, 46,152, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, /* block 6 */ -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,155,154,154,156,154,154,154,154,154,154,154,154,154,157, -154,154,154,154,154,154,154,154,158,158,158,158,158,154,154,154, -154,154,154,159,159,159,159,159,159,159,159,159,159,159,159,159, -160,161,160,161,149,162,160,161,163,163,164,165,165,165,166,167, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +174,177,176,178,176,176,176,176,176,176,176,176,176,176,176,176, +179,176,176,180,181,179,176,176,176,176,176,176,176,182,179,176, +183,184,176,176,176,176,176,176,176,176,176,176,176,176,176,176, +176,176,185,176,176,186,176,176,176,176,176,176,176,176,176,187, +176,176,176,176,176,176,176,176,188,189,189,189,189,176,190,176, +176,176,176,191,191,191,191,191,191,191,191,191,191,191,191,191, +192,193,192,193,194,195,192,193,196,196,197,198,198,198,199,200, /* block 7 */ -163,163,163,163,162, 46,168,169,170,170,170,163,171,163,172,172, -173,174,175,174,174,176,174,174,177,178,179,174,180,174,174,174, -181,182,163,183,174,174,184,174,174,185,174,174,186,187,187,187, -173,188,189,188,188,190,188,188,191,192,193,188,194,188,188,188, -195,196,197,198,188,188,199,188,188,200,188,188,201,202,202,203, -204,205,206,207,207,208,209,210,160,161,160,161,160,161,160,161, -160,161,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -213,214,215,216,217,218,219,160,161,220,160,161,221,222,222,222, +196,196,196,196,201, 46,202,203,204,204,204,196,205,196,206,206, +207,208,209,208,208,210,208,208,211,212,213,208,214,208,208,208, +215,216,196,217,208,208,218,208,208,219,208,208,220,221,221,221, +222,223,224,223,223,225,223,223,226,227,228,223,229,223,223,223, +230,231,232,233,223,223,234,223,223,235,223,223,236,237,237,238, +239,240,241,242,242,243,244,245,192,193,192,193,192,193,192,193, +192,193,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +248,249,250,251,252,253,254,192,193,255,192,193,256,257,257,257, /* block 8 */ -223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223, -224,224,225,224,226,224,224,224,224,224,224,224,224,224,227,224, -224,228,229,224,224,224,224,224,224,224,230,224,224,224,224,224, -231,231,232,231,233,231,231,231,231,231,231,231,231,231,234,231, -231,235,236,231,231,231,231,231,231,231,237,231,231,231,231,231, -238,238,238,238,238,238,239,238,239,238,238,238,238,238,238,238, -240,241,242,243,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, +258,258,258,258,258,258,258,258,258,258,258,258,258,258,258,258, +259,259,260,259,261,259,259,259,259,259,259,259,259,259,262,259, +259,263,264,259,259,259,259,259,259,259,265,259,259,259,259,259, +266,266,267,266,268,266,266,266,266,266,266,266,266,266,269,266, +266,270,271,266,266,266,266,266,266,266,272,266,266,266,266,266, +273,273,273,273,273,273,274,273,274,273,273,273,273,273,273,273, +275,276,277,278,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, /* block 9 */ -240,241,244,245,246,247,247,246,248,248,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -249,240,241,240,241,240,241,240,241,240,241,240,241,240,241,250, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, +275,276,279,280,281,282,282,281,283,283,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +284,275,276,275,276,275,276,275,276,275,276,275,276,275,276,285, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, /* block 10 */ -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -163,251,251,251,251,251,251,251,251,251,251,251,251,251,251,251, -251,251,251,251,251,251,251,251,251,251,251,251,251,251,251,251, -251,251,251,251,251,251,251,163,163,252,253,253,253,253,253,254, -255,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, -256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +196,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +286,286,286,286,286,286,286,196,196,287,288,288,288,288,288,289, +290,291,291,291,291,291,291,291,291,291,291,291,291,291,291,291, +291,291,291,291,291,291,291,291,291,291,291,291,291,291,291,291, /* block 11 */ -256,256,256,256,256,256,256,257,255,258,259,163,163,260,260,261, -262,263,263,263,263,263,263,263,263,263,263,263,263,263,263,263, -263,263,264,263,263,263,263,263,263,263,263,263,263,263,263,263, -265,265,265,265,265,265,265,265,265,265,265,265,265,265,266,265, -267,265,265,268,265,269,267,269,262,262,262,262,262,262,262,262, -270,270,270,270,270,270,270,270,270,270,270,270,270,270,270,270, -270,270,270,270,270,270,270,270,270,270,270,262,262,262,262,270, -270,270,270,267,271,262,262,262,262,262,262,262,262,262,262,262, +291,291,291,291,291,291,291,292,290,293,294,196,196,295,295,296, +297,298,298,298,298,298,298,298,298,298,298,298,298,298,298,298, +298,298,299,298,298,298,298,298,298,298,298,298,298,298,298,298, +300,300,300,300,300,300,300,300,300,300,300,300,300,300,301,300, +302,300,300,303,300,304,302,304,297,297,297,297,297,297,297,297, +305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305, +305,305,305,305,305,305,305,305,305,305,305,297,297,297,297,305, +305,305,305,302,306,297,297,297,297,297,297,297,297,297,297,297, /* block 12 */ -272,272,272,272,272,273,274,274,275,276,276,277,278,279,280,280, -281,281,281,281,281,281,281,281,281,281,281,282,283,284,284,285, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -287,286,286,286,286,286,286,286,286,286,286,288,288,288,288,288, -288,288,288,289,289,289,281,290,291,281,281,281,281,281,281,281, -292,292,292,292,292,292,292,292,292,292,276,293,293,279,286,286, -289,286,286,294,286,286,286,286,286,286,286,286,286,286,286,286, +307,307,307,307,307,308,309,309,310,311,311,312,313,314,315,315, +316,316,316,316,316,316,316,316,316,316,316,317,318,319,319,320, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +322,321,321,321,321,321,321,321,321,321,321,323,323,323,323,323, +323,323,323,324,325,325,316,326,327,316,316,316,316,316,316,316, +328,328,328,328,328,328,328,328,328,328,311,329,329,314,321,321, +324,321,321,330,321,321,321,321,321,321,321,321,321,321,321,321, /* block 13 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,295,286,281,281,281,281,281,281,281,273,280,291, -291,281,281,281,281,296,296,281,281,280,291,291,291,281,286,286, -297,297,297,297,297,297,297,297,297,297,286,286,286,298,298,286, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,331,321,316,316,316,316,316,316,332,308,315,333, +333,316,316,332,316,334,334,332,332,315,333,333,333,316,321,321, +335,335,335,335,335,335,335,335,335,335,321,321,321,336,336,321, /* block 14 */ -299,299,299,300,300,300,300,300,300,300,300,301,300,301,302,303, -304,305,304,304,304,304,304,304,304,304,304,304,304,304,304,304, -304,304,304,304,304,304,304,304,304,304,304,304,304,304,304,304, -306,306,306,306,306,306,306,306,306,306,306,306,306,306,306,306, -307,307,307,307,307,307,307,307,307,307,307,302,302,304,304,304, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +337,337,337,338,338,338,338,338,338,338,338,339,338,339,340,341, +342,343,342,342,342,342,342,342,342,342,342,342,342,342,342,342, +342,342,342,342,342,342,342,342,342,342,342,342,342,342,342,342, +344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344, +345,345,345,345,345,345,345,345,345,345,345,340,340,342,342,342, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, /* block 15 */ -308,308,308,308,308,308,308,308,308,308,308,308,308,308,308,308, -308,308,308,308,308,308,308,308,308,308,308,308,308,308,308,308, -308,308,308,308,308,308,309,309,309,309,309,309,309,309,309,309, -309,308,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -310,310,310,310,310,310,310,310,310,310,311,311,311,311,311,311, -311,311,311,311,311,311,311,311,311,311,311,311,311,311,311,311, -311,311,311,311,311,311,311,311,311,311,311,312,312,312,312,312, -312,312,312,312,313,313,314,315,316,317,318,262,262,319,320,320, +346,346,346,346,346,346,346,346,346,346,346,346,346,346,346,346, +346,346,346,346,346,346,346,346,346,346,346,346,346,346,346,346, +346,346,346,346,346,346,347,347,347,347,347,347,347,347,347,347, +347,346,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +348,348,348,348,348,348,348,348,348,348,349,349,349,349,349,349, +349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349, +349,349,349,349,349,349,349,349,349,349,349,350,350,350,350,350, +350,350,350,350,351,351,352,353,354,355,356,297,297,357,358,358, /* block 16 */ +359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359, +359,359,359,359,359,359,360,360,361,361,362,360,360,360,360,360, +360,360,360,360,362,360,360,360,362,360,360,360,360,363,297,297, +364,364,364,364,364,364,365,366,364,366,364,364,364,366,366,297, +367,367,367,367,367,367,367,367,367,367,367,367,367,367,367,367, +367,367,367,367,367,367,367,367,367,368,368,368,297,297,369,297, +342,342,342,342,342,342,342,342,342,342,342,340,340,340,340,340, 321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, -321,321,321,321,321,321,322,322,323,323,324,322,322,322,322,322, -322,322,322,322,324,322,322,322,324,322,322,322,322,325,262,262, -326,326,326,326,326,326,326,327,326,327,326,326,326,327,327,262, -328,328,328,328,328,328,328,328,328,328,328,328,328,328,328,328, -328,328,328,328,328,328,328,328,328,329,329,329,262,262,330,262, -304,304,304,304,304,304,304,304,304,304,304,302,302,302,302,302, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, /* block 17 */ -286,286,286,286,286,286,286,286,331,286,286,286,286,286,286,302, -272,272,302,302,302,302,302,302,291,291,291,291,291,291,291,291, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,296,291,291,291,291,291,291, -291,291,291,332,281,281,281,281,281,281,281,281,281,281,281,281, -332,332,273,290,290,290,290,290,290,290,291,291,291,291,291,291, -290,290,290,290,290,290,290,290,290,290,290,290,290,290,290,281, +321,321,321,321,321,321,321,321,370,321,321,321,321,321,321,340, +307,307,340,340,340,340,340,316,333,333,333,333,333,333,333,333, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,334,327,327,333,327,327,327, +333,333,333,371,316,316,316,316,316,316,316,316,316,316,316,316, +372,372,308,326,326,326,326,326,326,326,333,333,333,333,333,333, +326,326,326,373,326,326,326,326,326,326,326,326,326,326,326,316, /* block 18 */ -333,333,333,334,335,335,335,335,335,335,335,335,335,335,335,335, -335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335, -335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335, -335,335,335,335,335,335,335,335,335,335,333,334,336,335,334,334, -334,333,333,333,333,333,333,333,333,334,334,334,334,337,334,334, -335,338,339,154,154,333,333,333,335,335,335,335,335,335,335,335, -335,335,333,333,340,341,342,342,342,342,342,342,342,342,342,342, -343,344,335,335,335,335,335,335,335,335,335,335,335,335,335,335, +374,374,374,375,376,376,376,376,376,376,376,376,376,376,376,376, +376,376,376,376,376,377,377,377,377,377,377,377,377,377,377,377, +377,377,377,377,377,377,377,377,377,377,377,377,377,377,377,377, +377,377,377,377,377,377,377,377,377,377,374,375,378,376,375,375, +375,374,374,374,374,374,374,374,374,375,375,375,375,379,375,375, +376,380,381,176,176,374,374,374,377,377,377,377,377,377,377,377, +376,376,374,374,382,383,384,384,384,384,384,384,384,384,384,384, +385,386,376,376,376,376,376,376,377,377,377,377,377,377,377,377, /* block 19 */ -345,346,347,347,163,345,345,345,345,345,345,345,345,163,163,345, -345,163,163,345,345,345,345,345,345,345,345,345,345,345,345,345, -345,345,345,345,345,345,345,345,345,163,345,345,345,345,345,345, -345,163,345,163,163,163,345,345,345,345,163,163,348,345,349,347, -347,346,346,346,346,163,163,347,347,163,163,347,347,350,345,163, -163,163,163,163,163,163,163,349,163,163,163,163,345,345,163,345, -345,345,346,346,163,163,351,351,351,351,351,351,351,351,351,351, -345,345,352,352,353,353,353,353,353,353,354,352,345,355,356,163, +387,388,389,389,196,387,387,387,387,387,387,387,387,196,196,387, +387,196,196,387,387,390,390,390,390,390,390,390,390,390,390,390, +390,390,390,390,390,390,390,390,390,196,390,390,390,390,390,390, +390,196,390,196,196,196,390,390,390,390,196,196,391,387,392,389, +389,388,388,388,388,196,196,389,389,196,196,389,389,393,387,196, +196,196,196,196,196,196,196,392,196,196,196,196,390,390,196,390, +387,387,388,388,196,196,394,394,394,394,394,394,394,394,394,394, +390,390,395,395,396,396,396,396,396,396,397,395,387,398,399,196, /* block 20 */ -163,357,357,358,163,359,359,359,359,359,359,163,163,163,163,359, -359,163,163,359,359,359,359,359,359,359,359,359,359,359,359,359, -359,359,359,359,359,359,359,359,359,163,359,359,359,359,359,359, -359,163,359,359,163,359,359,163,359,359,163,163,360,163,358,358, -358,357,357,163,163,163,163,357,357,163,163,357,357,361,163,163, -163,357,163,163,163,163,163,163,163,359,359,359,359,163,359,163, -163,163,163,163,163,163,362,362,362,362,362,362,362,362,362,362, -357,357,359,359,359,357,363,163,163,163,163,163,163,163,163,163, +196,400,400,401,196,402,402,402,402,402,402,196,196,196,196,402, +402,196,196,402,402,402,402,402,402,402,402,402,402,402,402,402, +402,402,402,402,402,402,402,402,402,196,402,402,402,402,402,402, +402,196,402,402,196,402,402,196,402,402,196,196,403,196,401,401, +401,400,400,196,196,196,196,400,400,196,196,400,400,404,196,196, +196,400,196,196,196,196,196,196,196,402,402,402,402,196,402,196, +196,196,196,196,196,196,405,405,405,405,405,405,405,405,405,405, +400,406,402,402,402,400,407,196,196,196,196,196,196,196,196,196, /* block 21 */ -163,364,364,365,163,366,366,366,366,366,366,366,366,366,163,366, -366,366,163,366,366,366,366,366,366,366,366,366,366,366,366,366, -366,366,366,366,366,366,366,366,366,163,366,366,366,366,366,366, -366,163,366,366,163,366,366,366,366,366,163,163,367,366,365,365, -365,364,364,364,364,364,163,364,364,365,163,365,365,368,163,163, -366,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -366,366,364,364,163,163,369,369,369,369,369,369,369,369,369,369, -370,371,163,163,163,163,163,163,163,366,364,364,364,367,367,367, +196,408,408,409,196,410,410,410,410,410,410,410,410,410,196,410, +410,410,196,410,410,411,411,411,411,411,411,411,411,411,411,411, +411,411,411,411,411,411,411,411,411,196,411,411,411,411,411,411, +411,196,411,411,196,411,411,411,411,411,196,196,412,410,409,409, +409,408,408,408,408,408,196,408,408,409,196,409,409,413,196,196, +410,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +410,410,408,408,196,196,414,414,414,414,414,414,414,414,414,414, +415,416,196,196,196,196,196,196,196,411,408,417,408,412,412,412, /* block 22 */ -163,372,373,373,163,374,374,374,374,374,374,374,374,163,163,374, -374,163,163,374,374,374,374,374,374,374,374,374,374,374,374,374, -374,374,374,374,374,374,374,374,374,163,374,374,374,374,374,374, -374,163,374,374,163,374,374,374,374,374,163,163,375,374,376,372, -373,372,372,372,372,163,163,373,373,163,163,373,373,377,163,163, -163,163,163,163,163,378,372,376,163,163,163,163,374,374,163,374, -374,374,372,372,163,163,379,379,379,379,379,379,379,379,379,379, -380,374,381,381,381,381,381,381,163,163,163,163,163,163,163,163, +196,418,419,419,196,420,420,420,420,420,420,420,420,196,196,420, +420,196,196,420,420,421,421,421,421,421,421,421,421,421,421,421, +421,421,421,421,421,421,421,421,421,196,421,421,421,421,421,421, +421,196,421,421,196,421,421,421,421,421,196,196,422,420,423,418, +419,418,418,418,418,196,196,419,419,196,196,419,419,424,196,196, +196,196,196,196,196,425,418,423,196,196,196,196,421,421,196,421, +420,420,418,418,196,196,426,426,426,426,426,426,426,426,426,426, +427,421,428,428,428,428,428,428,196,196,196,196,196,196,196,196, /* block 23 */ -163,163,382,383,163,383,383,383,383,383,383,163,163,163,383,383, -383,163,383,383,383,383,163,163,163,383,383,163,383,163,383,383, -163,163,163,383,383,163,163,163,383,383,383,163,163,163,383,383, -383,383,383,383,383,383,383,383,383,383,163,163,163,163,384,385, -382,385,385,163,163,163,385,385,385,163,385,385,385,386,163,163, -383,163,163,163,163,163,163,384,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,387,387,387,387,387,387,387,387,387,387, -388,388,388,389,390,390,390,390,390,391,390,163,163,163,163,163, +196,196,429,430,196,430,430,430,430,430,430,196,196,196,430,430, +430,196,430,430,430,430,196,196,196,430,430,196,430,196,430,430, +196,196,196,430,430,196,196,196,430,430,430,196,196,196,430,430, +430,430,430,430,430,430,430,430,430,430,196,196,196,196,431,432, +429,432,432,196,196,196,432,432,432,196,432,432,432,433,196,196, +430,196,196,196,196,196,196,431,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,434,434,434,434,434,434,434,434,434,434, +435,435,435,436,437,437,437,437,437,438,437,196,196,196,196,196, /* block 24 */ -392,393,393,393,392,394,394,394,394,394,394,394,394,163,394,394, -394,163,394,394,394,394,394,394,394,394,394,394,394,394,394,394, -394,394,394,394,394,394,394,394,394,163,394,394,394,394,394,394, -394,394,394,394,394,394,394,394,394,394,163,163,395,394,392,392, -392,393,393,393,393,163,392,392,392,163,392,392,392,396,163,163, -163,163,163,163,163,392,392,163,394,394,394,163,163,394,163,163, -394,394,392,392,163,163,397,397,397,397,397,397,397,397,397,397, -163,163,163,163,163,163,163,398,399,399,399,399,399,399,399,400, +439,440,440,440,439,441,441,441,441,441,441,441,441,196,441,441, +441,196,441,441,441,442,442,442,442,442,442,442,442,442,442,442, +442,442,442,442,442,442,442,442,442,196,442,442,442,442,442,442, +442,442,442,442,442,442,442,442,442,442,196,196,443,441,439,439, +439,440,440,440,440,196,439,439,439,196,439,439,439,444,196,196, +196,196,196,196,196,439,439,196,442,442,442,196,196,441,196,196, +441,441,439,439,196,196,445,445,445,445,445,445,445,445,445,445, +196,196,196,196,196,196,196,446,447,447,447,447,447,447,447,448, /* block 25 */ -401,402,403,403,404,401,401,401,401,401,401,401,401,163,401,401, -401,163,401,401,401,401,401,401,401,401,401,401,401,401,401,401, -401,401,401,401,401,401,401,401,401,163,401,401,401,401,401,401, -401,401,401,401,163,401,401,401,401,401,163,163,405,401,403,406, -403,403,407,403,403,163,406,403,403,163,403,403,402,408,163,163, -163,163,163,163,163,407,407,163,163,163,163,163,163,401,401,163, -401,401,402,402,163,163,409,409,409,409,409,409,409,409,409,409, -163,401,401,403,163,163,163,163,163,163,163,163,163,163,163,163, +449,450,451,451,452,449,449,449,449,449,449,449,449,196,449,449, +449,196,449,449,449,449,449,449,449,449,449,449,449,449,449,449, +449,449,449,449,449,449,449,449,449,196,449,449,449,449,449,449, +449,449,449,449,196,449,449,449,449,449,196,196,453,449,451,454, +455,451,455,451,451,196,454,455,455,196,455,455,450,456,196,196, +196,196,196,196,196,455,455,196,196,196,196,196,196,449,449,196, +449,449,450,450,196,196,457,457,457,457,457,457,457,457,457,457, +196,449,449,451,196,196,196,196,196,196,196,196,196,196,196,196, /* block 26 */ -410,410,411,411,412,412,412,412,412,412,412,412,412,163,412,412, -412,163,412,412,412,412,412,412,412,412,412,412,412,412,412,412, -412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,412, -412,412,412,412,412,412,412,412,412,412,412,413,413,412,414,411, -411,410,410,410,410,163,411,411,411,163,411,411,411,413,415,416, -163,163,163,163,412,412,412,414,417,417,417,417,417,417,417,412, -412,412,410,410,163,163,418,418,418,418,418,418,418,418,418,418, -417,417,417,417,417,417,417,417,417,416,412,412,412,412,412,412, +458,458,459,459,460,460,460,460,460,460,460,460,460,196,460,460, +460,196,460,460,460,461,461,461,461,461,461,461,461,461,461,461, +461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, +461,461,461,461,461,461,461,461,461,461,461,462,462,460,463,459, +459,458,458,458,458,196,459,459,459,196,459,459,459,462,464,465, +196,196,196,196,460,460,460,463,466,466,466,466,466,466,466,460, +460,460,458,458,196,196,467,467,467,467,467,467,467,467,467,467, +466,466,466,466,466,466,466,466,466,465,460,460,460,460,460,460, /* block 27 */ -163,419,420,420,163,421,421,421,421,421,421,421,421,421,421,421, -421,421,421,421,421,421,421,163,163,163,421,421,421,421,421,421, -421,421,421,421,421,421,421,421,421,421,421,421,421,421,421,421, -421,421,163,421,421,421,421,421,421,421,421,421,163,421,163,163, -421,421,421,421,421,421,421,163,163,163,422,163,163,163,163,423, -420,420,419,419,419,163,419,163,420,420,420,420,420,420,420,423, -163,163,163,163,163,163,424,424,424,424,424,424,424,424,424,424, -163,163,420,420,425,163,163,163,163,163,163,163,163,163,163,163, +196,468,469,469,196,470,470,470,470,470,470,470,470,470,470,470, +470,470,470,470,470,470,470,196,196,196,470,470,470,470,470,470, +470,470,470,470,470,470,470,470,470,470,470,470,470,470,470,470, +470,470,196,470,470,470,470,470,470,470,470,470,196,470,196,196, +470,470,470,470,470,470,470,196,196,196,471,196,196,196,196,472, +469,469,468,468,468,196,468,196,469,469,469,469,469,469,469,472, +196,196,196,196,196,196,473,473,473,473,473,473,473,473,473,473, +196,196,469,469,474,196,196,196,196,196,196,196,196,196,196,196, /* block 28 */ -163,426,426,426,426,426,426,426,426,426,426,426,426,426,426,426, -426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,426, -426,426,426,426,426,426,426,426,426,426,426,426,426,426,426,426, -426,427,426,428,427,427,427,427,427,427,429,163,163,163,163,430, -431,431,431,431,431,426,432,433,433,433,433,433,433,427,433,434, -435,435,435,435,435,435,435,435,435,435,436,436,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475, +475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475, +475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475, +475,476,475,477,476,476,476,476,476,476,478,196,196,196,196,479, +480,480,480,480,480,475,481,482,482,482,482,482,482,476,482,483, +484,484,484,484,484,484,484,484,484,484,485,485,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 29 */ -163,437,437,163,437,163,437,437,437,437,437,163,437,437,437,437, -437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, -437,437,437,437,163,437,163,437,437,437,437,437,437,437,437,437, -437,438,437,439,438,438,438,438,438,438,440,438,438,437,163,163, -441,441,441,441,441,163,442,163,443,443,443,443,443,438,444,163, -445,445,445,445,445,445,445,445,445,445,163,163,437,437,437,437, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,486,486,196,486,196,486,486,486,486,486,196,486,486,486,486, +486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486, +486,486,486,486,196,486,196,486,486,486,486,486,486,486,486,486, +486,487,486,488,487,487,487,487,487,487,489,487,487,486,196,196, +490,490,490,490,490,196,491,196,492,492,492,492,492,487,493,196, +494,494,494,494,494,494,494,494,494,494,196,196,486,486,486,486, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 30 */ -446,447,447,447,448,448,448,448,449,448,448,448,448,449,449,449, -449,449,449,447,448,447,447,447,450,450,447,447,447,447,447,447, -451,451,451,451,451,451,451,451,451,451,452,452,452,452,452,452, -452,452,452,452,447,450,447,450,447,450,453,454,453,454,455,455, -446,446,446,446,446,446,446,446,163,446,446,446,446,446,446,446, -446,446,446,446,446,446,446,446,446,446,446,446,446,446,446,446, -446,446,446,446,446,446,446,446,446,446,446,446,446,163,163,163, -163,456,456,456,456,456,456,457,456,457,456,456,456,456,456,458, +495,496,496,496,497,497,497,497,498,497,497,497,497,498,498,498, +498,498,498,496,497,496,496,496,499,499,496,496,496,496,496,496, +500,500,500,500,500,500,500,500,500,500,501,501,501,501,501,501, +501,501,501,501,496,499,496,499,496,499,502,503,502,503,504,504, +495,495,495,495,495,495,495,495,196,495,495,495,495,495,495,495, +495,495,495,495,495,495,495,495,495,495,495,495,495,495,495,495, +495,495,495,495,495,495,495,495,495,495,495,495,495,196,196,196, +196,505,505,505,505,505,505,506,505,506,505,505,505,505,505,507, /* block 31 */ -456,456,459,459,460,448,450,450,446,446,446,446,446,456,456,456, -456,456,456,456,456,456,456,456,163,456,456,456,456,456,456,456, -456,456,456,456,456,456,456,456,456,456,456,456,456,456,456,456, -456,456,456,456,456,456,456,456,456,456,456,456,456,163,447,447, -447,447,447,447,447,447,450,447,447,447,447,447,447,163,447,447, -448,448,448,448,448,461,461,461,461,448,448,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +505,505,508,508,509,497,499,499,495,495,495,495,495,505,505,505, +505,505,505,505,505,505,505,505,196,505,505,505,505,505,505,505, +505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505, +505,505,505,505,505,505,505,505,505,505,505,505,505,196,496,496, +496,496,496,496,496,496,499,496,496,496,496,496,496,196,496,496, +497,497,497,497,497,510,510,510,510,497,497,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 32 */ -462,462,462,462,462,462,462,462,462,462,462,462,462,462,462,462, -462,462,462,462,462,462,462,462,462,462,462,462,462,462,462,462, -462,462,462,462,462,462,462,462,462,462,462,463,463,464,464,464, -464,465,464,464,464,464,464,466,463,467,467,465,465,464,464,462, -468,468,468,468,468,468,468,468,468,468,469,469,470,470,470,470, -462,462,462,462,462,462,465,465,464,464,462,462,462,462,464,464, -464,462,463,471,471,462,462,463,463,471,471,471,471,471,462,462, -462,464,464,464,464,462,462,462,462,462,462,462,462,462,462,462, +511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, +511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, +511,511,511,511,511,511,511,511,511,511,511,512,512,513,513,513, +513,514,513,513,513,513,513,515,512,516,516,514,514,513,513,511, +517,517,517,517,517,517,517,517,517,517,518,518,519,519,519,519, +511,511,511,511,511,511,514,514,513,513,511,511,511,511,513,513, +513,511,512,520,520,511,511,512,512,520,520,520,520,520,511,511, +511,513,513,513,513,511,511,511,511,511,511,511,511,511,511,511, /* block 33 */ -462,462,464,463,465,464,464,471,471,471,471,471,471,472,462,471, -473,473,473,473,473,473,473,473,473,473,471,471,463,464,474,474, -475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475, -475,475,475,475,475,475,475,475,475,475,475,475,475,475,475,475, -475,475,475,475,475,475,163,475,163,163,163,163,163,475,163,163, -476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, -476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476, -476,476,476,476,476,476,476,476,476,476,476,477,478,476,476,476, +511,511,513,512,514,513,513,520,520,520,520,520,520,521,511,520, +522,522,522,522,522,522,522,522,522,522,520,520,512,513,523,523, +524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524, +524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524, +524,524,524,524,524,524,196,524,196,196,196,196,196,524,196,196, +525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,525, +525,525,525,525,525,525,525,525,525,525,525,525,525,525,525,525, +525,525,525,525,525,525,525,525,525,525,525,526,527,525,525,525, /* block 34 */ -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,480, -481,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,529, +530,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, +531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, /* block 35 */ -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, -482,482,482,482,482,482,482,482,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, +531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, +531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, +531,531,531,531,531,531,531,531,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, /* block 36 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,163,484,484,484,484,163,163, -484,484,484,484,484,484,484,163,484,163,484,484,484,484,163,163, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,196,533,533,533,533,196,196, +533,533,533,533,533,533,533,196,533,196,533,533,533,533,196,196, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, /* block 37 */ -484,484,484,484,484,484,484,484,484,163,484,484,484,484,163,163, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,163,484,484,484,484,163,163,484,484,484,484,484,484,484,163, -484,163,484,484,484,484,163,163,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,163,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, +533,533,533,533,533,533,533,533,533,196,533,533,533,533,196,196, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,196,533,533,533,533,196,196,533,533,533,533,533,533,533,196, +533,196,533,533,533,533,196,196,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,196,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, /* block 38 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,163,484,484,484,484,163,163,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,484,484,484,484,163,163,485,485,485, -486,487,488,487,487,487,487,488,488,489,489,489,489,489,489,489, -489,489,490,490,490,490,490,490,490,490,490,490,490,163,163,163, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,196,533,533,533,533,196,196,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,533,533,533,533,196,196,534,534,534, +535,536,537,536,536,536,536,537,537,538,538,538,538,538,538,538, +538,538,539,539,539,539,539,539,539,539,539,539,539,196,196,196, /* block 39 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -491,491,491,491,491,491,491,491,491,491,163,163,163,163,163,163, -492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, -492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, -492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, -492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, -492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492, -493,493,493,493,493,493,163,163,494,494,494,494,494,494,163,163, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +540,540,540,540,540,540,540,540,540,540,196,196,196,196,196,196, +541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, +541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, +541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, +541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, +541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, +542,542,542,542,542,542,196,196,543,543,543,543,543,543,196,196, /* blockblockblock 42 */ -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,497,498,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,546,547,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, /* block 43 */ -499,500,500,500,500,500,500,500,500,500,500,500,500,500,500,500, -500,500,500,500,500,500,500,500,500,500,500,501,502,163,163,163, -503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503, -503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503, -503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503, -503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503, -503,503,503,503,503,503,503,503,503,503,503,504,504,504,505,505, -505,503,503,503,503,503,503,503,503,163,163,163,163,163,163,163, +548,549,549,549,549,549,549,549,549,549,549,549,549,549,549,549, +549,549,549,549,549,549,549,549,549,549,549,550,551,196,196,196, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, +552,552,552,552,552,552,552,552,552,552,552,553,553,553,554,554, +554,552,552,552,552,552,552,552,552,196,196,196,196,196,196,196, /* block 44 */ -506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506, -506,506,507,507,508,509,163,163,163,163,163,163,163,163,163,506, -510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, -510,510,511,511,512,513,513,163,163,163,163,163,163,163,163,163, -514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514, -514,514,515,515,163,163,163,163,163,163,163,163,163,163,163,163, -516,516,516,516,516,516,516,516,516,516,516,516,516,163,516,516, -516,163,517,517,163,163,163,163,163,163,163,163,163,163,163,163, +555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555, +555,555,556,556,557,558,196,196,196,196,196,196,196,196,196,555, +559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, +559,559,560,560,561,562,562,196,196,196,196,196,196,196,196,196, +563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563, +563,563,564,564,196,196,196,196,196,196,196,196,196,196,196,196, +565,565,565,565,565,565,565,565,565,565,565,565,565,196,565,565, +565,196,566,566,196,196,196,196,196,196,196,196,196,196,196,196, /* block 45 */ -518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, -518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518, -518,518,518,519,519,518,518,518,518,518,518,518,518,518,518,518, -518,518,518,518,520,520,521,522,522,522,522,522,522,522,521,521, -521,521,521,521,521,521,522,521,521,523,523,523,523,523,523,523, -523,523,524,523,525,525,525,526,527,527,525,528,518,523,163,163, -529,529,529,529,529,529,529,529,529,529,163,163,163,163,163,163, -530,530,530,530,530,530,530,530,530,530,163,163,163,163,163,163, +567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567, +567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567, +567,567,567,568,568,567,567,567,567,567,567,567,567,567,567,567, +567,567,567,567,569,569,570,571,571,571,571,571,571,571,570,570, +570,570,570,570,570,570,571,570,570,572,572,572,572,572,572,572, +572,572,573,572,574,574,575,576,577,577,575,578,567,572,196,196, +579,579,579,579,579,579,579,579,579,579,196,196,196,196,196,196, +580,580,580,580,580,580,580,580,580,580,196,196,196,196,196,196, /* block 46 */ -531,531,532,533,534,532,535,531,534,536,537,538,538,538,539,538, -540,540,540,540,540,540,540,540,540,540,163,163,163,163,163,163, -541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, -541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, -541,541,541,542,541,541,541,541,541,541,541,541,541,541,541,541, -541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, -541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, -541,541,541,541,541,541,541,541,541,163,163,163,163,163,163,163, +581,581,582,583,584,582,585,581,584,586,587,588,588,588,589,588, +590,590,590,590,590,590,590,590,590,590,196,196,196,196,196,196, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,592,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,196,196,196,196,196,196,196, /* block 47 */ -541,541,541,541,541,543,543,541,541,541,541,541,541,541,541,541, -541,541,541,541,541,541,541,541,541,541,541,541,541,541,541,541, -541,541,541,541,541,541,541,541,541,544,541,163,163,163,163,163, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -496,496,496,496,496,496,163,163,163,163,163,163,163,163,163,163, +591,591,591,591,591,593,593,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591, +591,591,591,591,591,591,591,591,591,594,591,196,196,196,196,196, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +545,545,545,545,545,545,196,196,196,196,196,196,196,196,196,196, /* block 48 */ -545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, -545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,163, -546,546,546,547,547,547,547,546,546,547,547,547,163,163,163,163, -547,547,546,547,547,547,547,547,547,548,548,548,163,163,163,163, -549,163,163,163,550,550,551,551,551,551,551,551,551,551,551,551, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552, -552,552,552,552,552,552,552,552,552,552,552,552,552,552,163,163, -552,552,552,552,552,163,163,163,163,163,163,163,163,163,163,163, +595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595, +595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,196, +596,596,596,597,597,597,597,596,596,597,597,597,196,196,196,196, +597,597,596,597,597,597,597,597,597,598,598,598,196,196,196,196, +599,196,196,196,600,600,601,601,601,601,601,601,601,601,601,601, +602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602, +602,602,602,602,602,602,602,602,602,602,602,602,602,602,196,196, +602,602,602,602,602,196,196,196,196,196,196,196,196,196,196,196, /* block 49 */ -553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,553, -553,553,553,553,553,553,553,553,553,553,553,553,553,553,553,553, -553,553,553,553,553,553,553,553,553,553,553,553,163,163,163,163, -553,553,553,553,553,554,554,554,553,553,554,553,553,553,553,553, -553,553,553,553,553,553,553,553,553,553,163,163,163,163,163,163, -555,555,555,555,555,555,555,555,555,555,556,163,163,163,557,557, -558,558,558,558,558,558,558,558,558,558,558,558,558,558,558,558, -558,558,558,558,558,558,558,558,558,558,558,558,558,558,558,558, +603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603, +603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603, +603,603,603,603,603,603,603,603,603,603,603,603,196,196,196,196, +603,603,603,603,603,604,604,604,603,603,604,603,603,603,603,603, +603,603,603,603,603,603,603,603,603,603,196,196,196,196,196,196, +605,605,605,605,605,605,605,605,605,605,606,196,196,196,607,607, +608,608,608,608,608,608,608,608,608,608,608,608,608,608,608,608, +608,608,608,608,608,608,608,608,608,608,608,608,608,608,608,608, /* block 50 */ -559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559, -559,559,559,559,559,559,559,560,560,561,561,560,163,163,562,562, -563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563, -563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563, -563,563,563,563,563,563,563,563,563,563,563,563,563,563,563,563, -563,563,563,563,563,564,565,564,565,565,565,565,565,565,565,163, -566,567,565,567,567,565,565,565,565,565,565,565,565,564,564,564, -564,564,564,565,565,568,568,568,568,568,568,568,568,163,163,568, +609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,609, +609,609,609,609,609,609,609,610,610,611,611,610,196,196,612,612, +613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613, +613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613, +613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613, +613,613,613,613,613,614,615,614,615,615,615,615,615,615,615,196, +616,617,615,617,617,615,615,615,615,615,615,615,615,614,614,614, +614,614,614,615,615,618,618,618,618,618,618,618,618,196,196,618, /* block 51 */ -569,569,569,569,569,569,569,569,569,569,163,163,163,163,163,163, -569,569,569,569,569,569,569,569,569,569,163,163,163,163,163,163, -570,570,570,570,570,570,570,571,572,572,572,572,570,570,163,163, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,573,574, -574,154,154,154,154,154,154,154,154,154,154,154,574,574,574,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +619,619,619,619,619,619,619,619,619,619,196,196,196,196,196,196, +619,619,619,619,619,619,619,619,619,619,196,196,196,196,196,196, +620,620,620,620,620,620,620,621,622,622,622,622,620,620,196,196, +176,176,176,176,176,176,176,176,176,176,176,176,176,176,623,624, +624,176,176,176,176,176,176,176,176,176,176,176,624,624,624,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 52 */ -575,575,575,575,576,577,577,577,577,577,577,577,577,577,577,577, -577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, -577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577, -577,577,577,577,578,579,575,575,575,575,575,576,575,576,576,576, -576,576,575,576,580,577,577,577,577,577,577,577,577,163,163,163, -581,581,581,581,581,581,581,581,581,581,582,582,583,584,582,582, -583,585,585,585,585,585,585,585,585,585,585,578,578,578,578,578, -578,578,578,578,585,585,585,585,585,585,585,585,585,582,582,163, +625,625,625,625,626,627,627,627,627,627,627,627,627,627,627,627, +627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,627, +627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,627, +627,627,627,627,628,629,625,625,625,625,625,629,625,629,626,626, +626,626,625,629,630,627,627,627,627,627,627,627,627,196,631,631, +632,632,632,632,632,632,632,632,632,632,631,631,633,634,631,631, +633,635,635,635,635,635,635,635,635,635,635,628,628,628,628,628, +628,628,628,628,635,635,635,635,635,635,635,635,635,631,631,631, /* block 53 */ -586,586,587,588,588,588,588,588,588,588,588,588,588,588,588,588, -588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588, -588,587,586,586,586,586,587,587,586,586,589,590,586,586,588,588, -591,591,591,591,591,591,591,591,591,591,588,588,588,588,588,588, -592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, -592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592, -592,592,592,592,592,592,593,594,595,595,594,594,594,595,594,595, -595,595,596,596,163,163,163,163,163,163,163,163,597,597,597,597, +636,636,637,638,638,638,638,638,638,638,638,638,638,638,638,638, +638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638, +638,637,636,636,636,636,637,637,636,636,639,640,636,636,638,638, +641,641,641,641,641,641,641,641,641,641,638,638,638,638,638,638, +642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642, +642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642, +642,642,642,642,642,642,643,644,645,645,644,644,644,645,644,645, +645,645,646,646,196,196,196,196,196,196,196,196,647,647,647,647, /* block 54 */ -598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, -598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598, -598,598,598,598,599,599,599,599,599,599,599,599,600,600,600,600, -600,600,600,600,599,599,601,602,163,163,163,603,603,604,604,604, -605,605,605,605,605,605,605,605,605,605,163,163,163,598,598,598, -606,606,606,606,606,606,606,606,606,606,607,607,607,607,607,607, -607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607, -607,607,607,607,607,607,607,607,608,608,608,609,608,608,610,610, +648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648, +648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648, +648,648,648,648,649,649,649,649,649,649,649,649,650,650,650,650, +650,650,650,650,649,649,651,652,196,196,196,653,653,654,654,654, +655,655,655,655,655,655,655,655,655,655,196,196,196,648,648,648, +656,656,656,656,656,656,656,656,656,656,657,657,657,657,657,657, +657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657, +657,657,657,657,657,657,657,657,658,658,658,659,658,658,660,660, /* block 55 */ -611,612,613,614,615,616,617,618,619,163,163,163,163,163,163,163, -620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620, -620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620, -620,620,620,620,620,620,620,620,620,620,620,163,163,620,620,620, -621,621,621,621,621,621,621,621,163,163,163,163,163,163,163,163, -622,623,622,624,623,625,625,626,625,626,627,623,626,626,623,623, -626,628,623,623,623,623,623,623,623,629,630,631,631,625,631,631, -631,631,632,633,634,630,630,635,636,636,637,163,163,163,163,163, +661,662,663,664,665,666,667,668,669,275,276,196,196,196,196,196, +670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670, +670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670, +670,670,670,670,670,670,670,670,670,670,670,196,196,670,670,670, +671,671,671,671,671,671,671,671,196,196,196,196,196,196,196,196, +672,673,672,674,673,675,675,676,675,676,677,673,676,676,673,673, +676,678,673,673,673,673,673,673,673,679,680,681,681,675,681,681, +681,681,682,683,684,680,680,685,686,686,687,196,196,196,196,196, /* block 56 */ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, - 70, 70, 70, 70, 70, 70,221,221,221,221,221,638,147,147,147,147, -147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147, -147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147, -147,147,147,147,147,147,147,147,147,147,147,147,147,639,639,639, -639,639,148,147,147,147,639,639,639,639,639, 70, 70, 70, 70, 70, - 70, 70, 70, 70, 70, 70, 70, 70,640,641, 70, 70, 70,642, 70, 70, + 70, 70, 70, 70, 70, 70,256,256,256,256,256,688,149,149,149,149, +149,149,149,149,149,149,149,149,149,149,149,149,149,149,149,149, +149,149,149,149,149,149,149,149,149,149,149,149,149,149,149,149, +149,149,149,149,149,149,149,149,149,149,149,149,149,689,689,689, +689,689,150,149,149,149,689,689,689,689,689, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70,690,691, 70, 70, 70,692, 70, 70, /* block 57 */ - 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,643, 70, - 70, 70, 70, 70, 70, 70,644, 70, 70, 70, 70,645,645,645,645,645, -645,645,645,645,646,645,645,645,646,645,645,645,645,645,645,645, -645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,647, -648,648,158,158,154,154,154,154,154,154,154,154,154,154,154,154, -158,158,158,158,158,158,158,158,158,158,158,158,158,158,158,158, -158,158,158,158,158,158,158,574,574,574,574,574,574,574,574,574, -574,574,574,574,574,154,154,154,649,154,650,154,154,154,154,154, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,693, 70, + 70, 70, 70, 70, 70, 70,694, 70, 70, 70, 70,695,695,695,695,695, +695,695,695,695,696,695,695,695,696,695,695,695,695,695,695,695, +695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,697, +698,698,189,189,176,176,176,176,176,176,176,176,176,176,176,176, +189,189,189,624,624,624,624,624,624,624,624,624,624,624,624,624, +624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,624, +624,624,624,624,624,176,176,176,699,176,700,176,176,176,176,176, /* block 58 */ 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, @@ -2986,12 +3230,12 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 78080 bytes, block = 128 */ 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, -651,652, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, +701,702, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, /* block 59 */ 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, - 65, 66, 65, 66, 65, 66, 69, 69, 69, 69,653,654, 70, 70,655, 70, + 65, 66, 65, 66, 65, 66, 69, 69, 69, 69,703,704, 70, 70,705, 70, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 67, 65, 66, 65, 66, @@ -3000,123 +3244,123 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 78080 bytes, block = 128 */ 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, /* block 60 */ -656,656,656,656,656,656,656,656,657,657,657,657,657,657,657,657, -656,656,656,656,656,656,163,163,657,657,657,657,657,657,163,163, -656,656,656,656,656,656,656,656,657,657,657,657,657,657,657,657, -656,656,656,656,656,656,656,656,657,657,657,657,657,657,657,657, -656,656,656,656,656,656,163,163,657,657,657,657,657,657,163,163, -173,656,173,656,173,656,173,656,163,657,163,657,163,657,163,657, -656,656,656,656,656,656,656,656,657,657,657,657,657,657,657,657, -658,658,659,659,659,659,660,660,661,661,662,662,663,663,163,163, +706,706,706,706,706,706,706,706,707,707,707,707,707,707,707,707, +706,706,706,706,706,706,196,196,707,707,707,707,707,707,196,196, +706,706,706,706,706,706,706,706,707,707,707,707,707,707,707,707, +706,706,706,706,706,706,706,706,707,707,707,707,707,707,707,707, +706,706,706,706,706,706,196,196,707,707,707,707,707,707,196,196, +708,706,708,706,708,706,708,706,196,707,196,707,196,707,196,707, +706,706,706,706,706,706,706,706,707,707,707,707,707,707,707,707, +709,709,710,710,710,710,711,711,712,712,713,713,714,714,196,196, /* block 61 */ -664,664,664,664,664,664,664,664,665,665,665,665,665,665,665,665, -664,664,664,664,664,664,664,664,665,665,665,665,665,665,665,665, -664,664,664,664,664,664,664,664,665,665,665,665,665,665,665,665, -656,656,666,667,666,163,173,666,657,657,668,668,669,162,670,162, -162,162,666,667,666,163,173,666,671,671,671,671,669,162,162,162, -656,656,173,173,163,163,173,173,657,657,672,672,163,162,162,162, -656,656,173,173,173,215,173,173,657,657,673,673,220,162,162,162, -163,163,666,667,666,163,173,666,674,674,675,675,669,162,162,163, +715,715,715,715,715,715,715,715,716,716,716,716,716,716,716,716, +715,715,715,715,715,715,715,715,716,716,716,716,716,716,716,716, +715,715,715,715,715,715,715,715,716,716,716,716,716,716,716,716, +706,706,717,718,717,196,708,717,707,707,719,719,720,201,721,201, +201,201,717,718,717,196,708,717,722,722,722,722,720,201,201,201, +706,706,708,723,196,196,708,708,707,707,724,724,196,201,201,201, +706,706,708,725,708,250,708,708,707,707,726,726,255,201,201,201, +196,196,717,718,717,196,708,717,727,727,728,728,720,201,201,196, /* block 62 */ -676,676,676,676,676,676,676,676,676,676,676, 51,677,678,679,680, -681,681,681,681,681,681,682, 43,683,684,685,686,686,687,685,686, - 43, 43, 43, 43,688, 43, 43,688,689,690,691,692,693,694,695,696, -697,697,698,698,698, 43, 43, 43, 43, 49, 57, 43,699,700, 43,701, -702, 43, 43, 43,703,704,705,700,700,699, 43, 43, 43, 43, 43, 43, - 43, 43, 50,706,701, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,676, - 51,707,707,707,707,708,709,710,711,712,713,713,713,713,713,713, - 54,646,163,163, 54, 54, 54, 54, 54, 54,714,715,716,717,718,645, +729,729,729,729,729,729,729,729,729,729,729, 51,730,731,732,733, +734,734,734,734,734,734,735, 43,736,737,738,739,739,740,738,739, + 43, 43, 43, 43,741, 43, 43,742,743,744,745,746,747,748,749,750, +751,751,752,752,752, 43, 43, 43, 43, 49, 57, 43,753,754, 43,755, +756, 43, 43, 43,757,758,759,754,754,753, 43, 43, 43, 43, 43,760, + 43, 43, 50,761,755, 43, 43, 43, 43, 43,762, 43, 43,763, 43,729, + 51,764,764,764,764,765,766,767,768,769,770,770,770,770,770,770, + 54,696,196,196, 54, 54, 54, 54, 54, 54,771,772,773,774,775,695, /* block 63 */ - 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,714,715,716,717,718,163, -645,645,645,645,645,645,645,645,645,645,645,645,645,163,163,163, -430,430,430,430,430,430,430,430,430,430,430,430,430,430,430,430, -430,430,430,430,430,430,430,430,430,430,430,430,430,430,430,430, -430,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719, -720,720,720,720,720,720,720,720,720,720,720,720,720,721,721,721, -721,720,721,722,721,720,720,158,158,158,158,720,720,720,720,720, -723,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,771,772,773,774,775,196, +695,695,695,695,695,695,695,695,695,695,695,695,695,196,196,196, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, +479,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776, +777,777,777,777,777,777,777,777,777,777,777,777,777,778,778,778, +778,777,778,779,778,777,777,189,189,189,189,777,777,777,777,777, +780,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 64 */ -724,724,725,724,724,724,724,725,724,724,726,725,725,725,726,726, -725,725,725,726,724,725,724,724,727,725,725,725,725,725,724,724, -724,724,728,724,725,724,729,724,725,730,731,732,725,725,733,726, -725,725,734,725,726,735,735,735,735,736,724,724,726,726,725,725, -716,716,716,716,716,725,726,726,737,737,724,716,724,724,738,461, +781,781,782,781,781,781,781,782,781,781,783,782,782,782,783,783, +782,782,782,783,781,782,781,781,784,782,782,782,782,782,781,781, +781,781,785,781,782,781,786,781,782,787,788,789,782,782,790,783, +782,782,791,782,783,792,792,792,792,793,781,781,783,783,782,782, +794,794,794,794,794,782,783,783,795,795,781,794,781,781,796,510, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, -739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739, -740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740, +797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797, +798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798, /* block 65 */ -741,741,741, 65, 66,741,741,741,741, 58,724,724,163,163,163,163, - 50, 50, 50, 50,742,743,743,743,743,743, 50, 50,744,744,744,744, - 50,744,744, 50,744,744, 50,744, 45,743,743,744,744,744, 50, 45, -744,744, 45, 45, 45, 45,744,744, 45, 45, 45, 45,744,744,744,744, -744,744,744,744,744,744,744,744,744,744,744,744,744,744, 50, 50, -744,744, 50,744, 50,744,744,744,744,744,744,744, 45,744, 45, 45, - 45, 45, 45, 45,744,744, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, +799,799,799, 65, 66,799,799,799,799, 58,781,781,196,196,196,196, + 50, 50, 50, 50,800,801,801,801,801,801, 50, 50,802,802,802,802, + 50,802,802, 50,802,802, 50,802, 45,801,801,802,802,802, 50, 45, +802,802, 45, 45, 45, 45,802,802, 45, 45, 45, 45,802,802,802,802, +802,802,802,802,802,802,802,802,802,802,802,802,802,802, 50, 50, +802,802, 50,802, 50,802,802,802,802,802,802,802, 45,802, 45, 45, + 45, 45, 45, 45,802,802, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, /* block 66 */ - 50, 50, 50, 50, 50, 50, 50, 50,745,745,745,745,745,745, 50, 50, - 50, 50,746, 53, 50,745, 50, 50, 50, 50, 50, 50, 50, 50, 50,745, -745,745,745, 50,745, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,745,745, 50, 50, - 50, 50, 50,745, 50,745, 50, 50, 50, 50, 50, 50,745, 50, 50, 50, - 50, 50,745,745,745,745, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50,745,745,745,745,745,745,745,745, 50, 50,745,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, + 50, 50,803, 50, 50, 50, 50,803,804,804,804,804,804,804, 50, 50, + 50, 50,805, 53, 50,804, 50, 50, 50, 50, 50, 50, 50, 50,803,804, +804,804,804, 50,804, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,804,804, 50, 50, + 50, 50, 50,804, 50,804, 50, 50, 50, 50, 50, 50,804, 50, 50, 50, + 50, 50,804,804,804,804, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50,804,804,804,804,804,804,804,804, 50, 50,804,804, +804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804, /* block 67 */ -745,745,745,745,745,745,745,745,745,745,745,745, 50, 50, 50,745, -745,745,745, 50, 50, 50, 50, 50,745, 50, 50, 50, 50, 50, 50, 50, - 50, 50,745,745, 50, 50,745, 50,745,745, 50,745, 50, 50, 50, 50, -745,745,745,745,745,745,745,745,745, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 50, 50, 50,745,745,745,745,745, 50, 50, -745,745, 50, 50, 50, 50,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745, 50, 50, -745,745,745,745,745, 50,745,745, 50, 50,745,745,745,745,745, 50, +804,804,804,804,804,804,804,804,804,804,804,804, 50, 50, 50,804, +804,804,804, 50, 50, 50, 50, 50,804, 50, 50, 50, 50, 50, 50, 50, + 50, 50,804,804, 50, 50,804, 50,804,804, 50,804, 50, 50, 50, 50, +804,804,804,804,804,804,804,804,804, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50,804,804,804,804,804, 50, 50, +804,804, 50, 50, 50, 50,804,804,804,804,804,804,804,804,804,804, +804,804,804,804,804,804,804,804,804,804,804,804,804,804, 50, 50, +804,804,804,804,804, 50,804,804, 50, 50,804,804,804,804,804, 50, /* block 68 */ - 45, 45, 45, 45, 45, 45, 45, 45,747,748,747,748, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,749,749, 45, 45, 45, 45, - 50, 50, 45, 45, 45, 45, 45, 45, 47,750,751, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45,752,752,752,752,752,752,752,752,752,752, -752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752, -752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752, -752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752, -752,752,752,752,752,752,752,752,752,752,752, 45, 50, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45,806,807,806,807, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,808,808, 45, 45, 45, 45, + 50, 50, 45, 45, 45, 45, 45, 45, 47,809,810, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45,811,811,811,811,811,811,811,811,811,811, +811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811, +811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811, +811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811, +811,811,811,811,811,811,811,811,811,811,811, 45, 50, 45, 45, 45, /* block 69 */ - 45, 45, 45, 45, 45, 45, 45, 45,753, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45,752, 45, 45, 45, 45, 45, 50, 50, 50, 50, 50, + 45, 45, 45, 45, 45, 45, 45, 45,812, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45,811, 45, 45, 45, 45, 45, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50,744,744, 45,744, 45, 45, 45, 45, 45, 45, 45, 45, + 50, 50, 50, 50,802,802, 45,802, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 47, -744, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 50, 50, 50, 50, - 50, 50,744, 45, 45, 45, 45, 45, 45,749,749,749,749, 47, 47, 47, -749, 47, 47,749, 45, 45, 45, 45, 47, 47, 47, 45, 45, 45, 45, 45, +802, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 50, 50, 50, 50, + 50, 50,802, 45, 45, 45, 45, 45, 45,808,808,808,808, 47, 47, 47, +808, 47, 47,808, 45, 45, 45, 45, 47, 47, 47, 45, 45, 45, 45, 45, /* block 70 */ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45,754,754,754,754,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,813,813,813,813,813,813, +813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,813,813,813,813,813, +813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, /* block 71 */ - 58, 58, 58, 58, 58, 58, 58, 58, 54, 54, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,755,755,755,755,755,755,755,755,755,755, -755,755,756,755,755,755,755,755,755,755,755,755,755,755,755,755, -757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757, -757,757,757,757,757,757,757,757,757,757, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58,814,814,814,814,814,814,814,814, +814,814,814,814,814,814,814,814,814,814,814,814,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,815,815,815,815,815,815,815,815,815,815, +815,815,816,815,815,815,815,815,815,815,815,815,815,815,815,815, +817,817,817,817,817,817,817,817,817,817,817,817,817,817,817,817, +817,817,817,817,817,817,817,817,817,817, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, /* block 72 */ @@ -3132,2322 +3376,2422 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 78080 bytes, block = 128 */ /* block 73 */ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, -744,744, 45, 45, 45, 45, 45, 45, 45, 45, 47, 47, 45, 45,744,744, -744,744,744,744,744,744,743, 50, 45, 45, 45, 45,744,744,744,744, -743, 50, 45, 45, 45, 45,744,744, 45, 45,744,744, 45, 45, 45,744, -744,744,744,744, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45,744, 45,744, 45, 45,744,744,744,744,744,744, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 50, 50, 50,742,742,758,758, 50, +802,802, 45, 45, 45, 45, 45, 45, 45, 45, 47, 47, 45, 45,802,802, +802,802,802,802,802,802,801, 50, 45, 45, 45, 45,802,802,802,802, +801, 50, 45, 45, 45, 45,802,802, 45, 45,802,802, 45, 45, 45,802, +802,802,802,802, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45,802, 45,802, 45, 45,802,802,802,802,802,802, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 50, 50, 50,800,800,818,818, 50, /* block 74 */ - 47, 47, 47, 47, 47,759,744,753,753,753,753,753,753,753, 47,753, -753, 47,753, 45,749,749,753,753, 47,753,753,753,753,760,753,753, - 47,753, 47, 47,753,753, 47,753,753,753, 47,753,753,753, 47, 47, -753,753,753,753,753,753,753,753, 47, 47, 47,753,753,753,753,753, -743,753,743,753,753,753,753,753,749,749,749,749,749,749,749,749, -749,749,749,749,753,753,753,753,753,753,753,753,753,753,753, 47, -743,759,759,743,753, 47, 47,753, 47,753,753,753,753,759,759,761, -753,753,753,753,753,753,753,753,753,753,753, 47,753,753, 47,749, + 47, 47, 47, 47, 47,819,802,812,812,812,812,812,812,812, 47,812, +812, 47,812, 45,808,808,812,812, 47,812,812,812,812,820,812,812, + 47,812, 47, 47,812,812, 47,812,812,812, 47,812,812,812, 47, 47, +812,812,812,812,812,812,812,812, 47, 47, 47,812,812,812,812,812, +801,812,801,812,812,812,812,812,808,808,808,808,808,808,808,808, +808,808,808,808,812,812,812,812,812,812,812,812,812,812,812, 47, +801,819,819,801,812, 47, 47,812, 47,812,812,812,812,819,819,821, +812,812,812,812,812,812,812,812,812,812,812, 47,812,812, 47,808, /* block 75 */ -753,753,753,753,753,753, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, -753,753, 47,749, 47, 47, 47, 47,753, 47,753, 47, 47,753,753,753, - 47,749,753,753,753,753,753, 47,753,753,749,749,762,753,753,753, - 47, 47,753,753,753,753,753,753,753,753,753,753,753,749,749,753, -753,753,753,753,749,749,753,753, 47,753,753,753,753,753,749, 47, -753, 47,753, 47,749,753,753,753,753,753,753,753,753,753,753,753, -753,753,753,753,753,753,753,753,753, 47,749,753,753,753,753,753, - 47, 47,749,749, 47,749,753, 47, 47,760,749,753,753,749,753,753, +812,812,812,812,812,812, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, +812,812, 47,808, 47, 47, 47, 47,812, 47,812, 47, 47,812,812,812, + 47,808,812,812,812,812,812, 47,812,812,808,808,822,812,812,812, + 47, 47,812,812,812,812,812,812,812,812,812,812,812,808,808,812, +812,812,812,812,808,808,812,812, 47,812,812,812,812,812,808, 47, +812, 47,812, 47,808,812,812,812,812,812,812,812,812,812,812,812, +812,812,812,812,812,812,812,812,812, 47,808,812,812,812,812,812, + 47, 47,808,808, 47,808,812, 47, 47,820,808,812,812,808,812,812, /* block 76 */ -753,753, 47,753,753,749, 45, 45, 47, 47,763,763,760,760,753, 47, -753,753, 47, 45, 47, 45, 47, 45, 45, 45, 45, 45, 45, 47, 45, 45, - 45, 47, 45, 45, 45, 45, 45, 45,749, 45, 45, 45, 45, 45, 45, 45, +812,812, 47,812,812,808, 45, 45, 47, 47,823,823,820,820,812, 47, +812,812, 47, 45, 47, 45, 47, 45, 45, 45, 45, 45, 45, 47, 45, 45, + 45, 47, 45, 45, 45, 45, 45, 45,808, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 47, 47, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 47, 45, 45, 47, 45, 45, 45, 45,749, 45,749, 45, - 45, 45, 45,749,749,749, 45,749, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 47, 47,753,753,753,704,705,704,705,704,705,704,705, -704,705,704,705,704,705, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 45, 45, 45, 45, 47, 45, 45, 47, 45, 45, 45, 45,808, 45,808, 45, + 45, 45, 45,808,808,808, 45,808, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 47, 47,812,812,812,758,759,758,759,758,759,758,759, +758,759,758,759,758,759, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, /* block 77 */ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, - 58, 58, 58, 58, 45,749,749,749, 45, 45, 45, 45, 45, 45, 45, 45, + 58, 58, 58, 58, 45,808,808,808, 45, 45, 45, 45, 45, 45, 45, 45, 45, 47, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, -749, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,749, - 50, 50, 50,745,745,747,748, 50,745,745, 50,745, 50,745, 50, 50, - 50, 50, 50, 50, 50,745,745, 50, 50, 50, 50, 50,745,745,745, 50, - 50, 50,745,745,745,745,747,748,747,748,747,748,747,748,747,748, +808, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,808, + 50, 50, 50,804,804,806,807, 50,804,804, 50,804, 50,804, 50, 50, + 50, 50, 50, 50, 50,804,804, 50, 50, 50, 50, 50,804,804,804, 50, + 50, 50,804,804,804,804,806,807,806,807,806,807,806,807,806,807, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, /* blockblock 79 */ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50,742,742, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50,800,800, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, /* block 80 */ - 50, 50, 50,747,748,747,748,747,748,747,748,747,748,747,748,747, -748,747,748,747,748,747,748,747,748, 50, 50,745, 50, 50, 50, 50, -745, 50, 50,745,745,745, 50, 50,745,745,745,745,745,745,745,745, - 50, 50, 50, 50, 50, 50, 50, 50,745, 50, 50, 50, 50, 50, 50, 50, -745,745, 50, 50,745,745, 50, 50, 50, 50, 50, 50, 50, 50, 50,745, -745,745,745, 50,745,745, 50, 50,747,748,747,748, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 50, 50,745,745, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50,745, 50, 50,745,745, 50, 50,747,748, 50, 50, + 50, 50, 50,806,807,806,807,806,807,806,807,806,807,806,807,806, +807,806,807,806,807,806,807,806,807, 50, 50,804, 50, 50, 50, 50, +804, 50, 50,804,804,804, 50, 50,804,804,804,804,804,804,804,804, + 50, 50, 50, 50, 50, 50, 50, 50,804, 50, 50, 50, 50, 50, 50, 50, +804,804, 50, 50,804,804, 50, 50, 50, 50, 50, 50, 50, 50, 50,804, +804,804,804, 50,804,804, 50, 50,806,807,806,807, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50,804,804, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50,804, 50, 50,804,804, 50, 50,806,807, 50, 50, /* block 81 */ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,745,745,745,745, 50, - 50, 50, 50, 50,745,745, 50, 50, 50, 50, 50, 50,745,745, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,804,804,804,804, 50, + 50, 50, 50, 50,804,804, 50, 50, 50, 50, 50, 50,804,804, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50,745,745, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 50, 50, 50,745,745,745,745,745,745,745, + 50, 50, 50, 50,804,804, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50,804,804,804,804,804,804,804, /* block 82 */ -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745, 50, 50, 50,745,745,745,745,745,745,745,745, 50,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745, -745,745,745,745,745,745,745, 50, 50, 50, 50, 50, 50, 50,745, 50, - 50, 50, 50,745,745,745, 50, 50, 50, 50, 50, 50,745,745,745, 50, - 50, 50, 50, 50, 50, 50, 50,745,745,745,745, 50, 50, 50, 50, 50, +804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804, +804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804, +804,804,804, 50, 50, 50,804,804,804,804,804,804,804,804, 50,804, +804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804, +804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804, +804,804,804,804,804,804,804, 50, 50, 50, 50, 50, 50, 50,804, 50, + 50, 50, 50,804,804,804, 50, 50, 50, 50, 50, 50,804,804,804, 50, + 50, 50, 50, 50, 50, 50, 50,804,804,804,804, 50, 50, 50, 50, 50, /* block 83 */ 45, 45, 45, 45, 45, 47, 47, 47, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,749,749, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,808,808, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 45, 45, 50, 50, 50, 50, 50, 50, 45, 45, 45, -749, 45, 45, 45, 45,749, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, +808, 45, 45, 45, 45,808, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45,754,754, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45,813,813, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, /* block 84 */ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45,754, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45,813, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,765, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,825, 45, /* block 85 */ -766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766, -766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766, -766,766,766,766,766,766,766,766,766,766,766,766,766,766,766,766, -767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,767, -767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,767, -767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,767, - 65, 66,768,769,770,771,772, 65, 66, 65, 66, 65, 66,773,774,775, -776, 70, 65, 66, 70, 65, 66, 70, 70, 70, 70, 70,646,645,777,777, +826,826,826,826,826,826,826,826,826,826,826,826,826,826,826,826, +826,826,826,826,826,826,826,826,826,826,826,826,826,826,826,826, +826,826,826,826,826,826,826,826,826,826,826,826,826,826,826,826, +827,827,827,827,827,827,827,827,827,827,827,827,827,827,827,827, +827,827,827,827,827,827,827,827,827,827,827,827,827,827,827,827, +827,827,827,827,827,827,827,827,827,827,827,827,827,827,827,827, + 65, 66,828,829,830,831,832, 65, 66, 65, 66, 65, 66,833,834,835, +836, 70, 65, 66, 70, 65, 66, 70, 70, 70, 70, 70,696,695,837,837, /* block 86 */ -211,212,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -211,212,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -211,212,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -211,212,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -211,212,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -211,212,211,212,211,212,211,212,211,212,211,212,211,212,211,212, -211,212,211,212,778,779,779,779,779,779,779,211,212,211,212,780, -780,780,211,212,163,163,163,163,163,781,781,781,781,782,781,781, +246,247,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +246,247,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +246,247,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +246,247,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +246,247,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +246,247,246,247,246,247,246,247,246,247,246,247,246,247,246,247, +246,247,246,247,838,839,839,839,839,839,839,246,247,246,247,840, +840,840,246,247,196,196,196,196,196,841,841,841,842,843,842,842, /* block 87 */ -783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, -783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, -783,783,783,783,783,783,163,783,163,163,163,163,163,783,163,163, -784,784,784,784,784,784,784,784,784,784,784,784,784,784,784,784, -784,784,784,784,784,784,784,784,784,784,784,784,784,784,784,784, -784,784,784,784,784,784,784,784,784,784,784,784,784,784,784,784, -784,784,784,784,784,784,784,784,163,163,163,163,163,163,163,785, -786,163,163,163,163,163,163,163,163,163,163,163,163,163,163,787, +844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844, +844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844, +844,844,844,844,844,844,196,844,196,196,196,196,196,844,196,196, +845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845, +845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845, +845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845, +845,845,845,845,845,845,845,845,196,196,196,196,196,196,196,846, +847,196,196,196,196,196,196,196,196,196,196,196,196,196,196,848, /* block 88 */ -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484, -484,484,484,484,484,484,484,163,163,163,163,163,163,163,163,163, -484,484,484,484,484,484,484,163,484,484,484,484,484,484,484,163, -484,484,484,484,484,484,484,163,484,484,484,484,484,484,484,163, -484,484,484,484,484,484,484,163,484,484,484,484,484,484,484,163, -484,484,484,484,484,484,484,163,484,484,484,484,484,484,484,163, -788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,788, -788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,788, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533, +533,533,533,533,533,533,533,196,196,196,196,196,196,196,196,196, +533,533,533,533,533,533,533,196,533,533,533,533,533,533,533,196, +533,533,533,533,533,533,533,196,533,533,533,533,533,533,533,196, +533,533,533,533,533,533,533,196,533,533,533,533,533,533,533,196, +533,533,533,533,533,533,533,196,533,533,533,533,533,533,533,196, +849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849, +849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849, /* block 89 */ - 43, 43,789,790,789,790, 43, 43, 43,789,790, 43,789,790, 43, 43, - 43, 43, 43, 43, 43, 43, 43,681, 43, 43,681, 43,789,790, 43, 43, -789,790,704,705,704,705,704,705,704,705, 43, 43, 43, 43,700,791, - 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,681,681,700, 43, 43, 43, -681,792,685,793, 43, 43, 43, 43, 43, 43, 43, 43,792, 43,792,792, - 45, 45, 43,700,700,704,705,704,705,704,705,704,705,681,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, -754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754, + 43, 43,850,851,850,851, 43, 43, 43,850,851, 43,850,851, 43, 43, + 43, 43, 43, 43, 43, 43, 43,852, 43, 43,734, 43,850,851, 43, 43, +850,851,758,759,758,759,758,759,758,759, 43, 43, 43, 43,754,853, +854,855, 43, 43, 43, 43, 43, 43, 43, 43,734,734,856, 43, 43, 43, +734,857,738,858, 43, 43, 43, 43, 43, 43, 43, 43,859, 43,859,859, + 45, 45, 43,754,754,758,759,758,759,758,759,758,759,734,813,813, +813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813, +813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813, /* block 90 */ -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,163,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,163,163,163,163,163,163,163,163,163,163,163,163, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,196,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,196,196,196,196,196,196,196,196,196,196,196,196, /* blockblock 92 */ -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,794,794,794,794,794,794,794,794,794,794, -794,794,794,794,794,794,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -795,795,796,796,795,795,795,795,795,795,795,795,163,163,163,163, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860, +860,860,860,860,860,860,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +861,861,862,862,861,861,861,861,861,861,861,861,861,861,863,863, /* block 93 */ -676,797,798,799,724,800,801,802,803,804,803,804,805,806,805,806, -803,804, 45,807,803,804,803,804,803,804,803,804,808,809,810,810, - 45,802,802,802,802,802,802,802,802,802,811,811,811,811,812,812, -813,814,814,814,814,814,724,815,802,802,802,816,817,818,819,819, -163,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, +729,864,865,866,781,867,868,869,870,871,872,873,874,875,874,875, +876,877, 45,878,876,877,876,877,876,877,876,877,879,880,881,881, + 45,869,869,869,869,869,869,869,869,869,882,882,882,882,883,883, +884,885,885,885,885,885,781,886,869,869,869,887,888,889,890,890, +196,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, /* block 94 */ -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,163,163,821,821,822,822,823,823,820, -824,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, -825,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, -825,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, -825,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, -825,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, -825,825,825,825,825,825,825,825,825,825,825,826,827,828,828,825, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,196,196,892,892,893,893,894,894,891, +895,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,897,898,899,899,896, /* block 95 */ -163,163,163,163,163,829,829,829,829,829,829,829,829,829,829,829, -829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829, -829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829, -163,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,831,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, +196,196,196,196,196,900,900,900,900,900,900,900,900,900,900,900, +900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,900, +900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,900, +196,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901, +901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901, +901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901, +901,901,901,901,902,901,901,901,901,901,901,901,901,901,901,901, +901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901, /* block 96 */ -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,163, -832,832,833,833,833,833,832,832,832,832,832,832,832,832,832,832, -829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829, -829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829, -819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819, -819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819, -819,819,819,819,163,163,163,163,163,163,163,163,163,163,163,163, -825,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, +901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,196, +903,903,904,904,904,904,903,903,903,903,903,903,903,903,903,903, +900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,900, +900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,900, +890,890,890,890,890,890,890,890,890,890,890,890,890,890,890,890, +890,890,890,890,890,890,890,890,890,890,890,890,890,890,890,890, +890,890,890,890,890,890,196,196,196,196,196,196,196,196,196,861, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, /* block 97 */ -834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,834, -834,834,834,834,834,834,834,834,834,834,834,834,834,835,835,163, -833,833,833,833,833,833,833,833,833,833,832,832,832,832,832,832, -832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, -832,832,832,832,832,832,832,832,836,836,836,836,836,836,836,836, -724, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, -834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,834, -834,834,834,834,834,834,834,834,834,834,834,834,835,835,835,461, +905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905, +905,905,905,905,905,905,905,905,905,905,905,905,905,906,906,196, +904,904,904,904,904,904,904,904,904,904,903,903,903,903,903,903, +903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903, +903,903,903,903,903,903,903,903,907,907,907,907,907,907,907,907, +781, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, +905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905, +905,905,905,905,905,905,905,905,905,905,905,905,906,906,906,510, /* block 98 */ -833,833,833,833,833,833,833,833,833,833,832,832,832,832,832,832, -832,832,832,832,832,832,832,837,832,837,832,832,832,832,832,832, -832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, -832, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, -832,832,832,832,832,832,832,832,832,832,832,832,724,724,724,724, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,832, +904,904,904,904,904,904,904,904,904,904,903,903,903,903,903,903, +903,903,903,903,903,903,903,908,903,908,903,903,903,903,903,903, +903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903, +903, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, +903,903,903,903,903,903,903,903,903,903,903,903,781,781,781,781, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,903, /* block 99 */ -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838, -838,838,838,838,838,838,838,838,832,832,832,832,832,832,832,832, -832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, -832,461,461,461,461,461,461,724,724,724,724,832,832,832,832,832, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909, +909,909,909,909,909,909,909,909,903,903,903,903,903,903,903,903, +903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903, +903,510,510,510,510,510,510,781,781,781,781,903,903,903,903,903, /* block 100 */ -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,724,724, -832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,832, -832,832,832,832,832,832,832,832,832,832,832,832,832,832,832,724, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,781,781, +903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903, +903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,781, /* block 101 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, /* block 102 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, /* blockblock 104 */ -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, -840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, +911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911, /* block 105 */ -840,840,840,840,840,840,840,840,840,840,840,840,840,163,163,163, -842,842,842,842,842,842,842,842,842,842,842,842,842,842,842,842, -842,842,842,842,842,842,842,842,842,842,842,842,842,842,842,842, -842,842,842,842,842,842,842,842,842,842,842,842,842,842,842,842, -842,842,842,842,842,842,842,163,163,163,163,163,163,163,163,163, -843,843,843,843,843,843,843,843,843,843,843,843,843,843,843,843, -843,843,843,843,843,843,843,843,843,843,843,843,843,843,843,843, -843,843,843,843,843,843,843,843,844,844,844,844,844,844,845,846, +911,911,911,911,911,911,911,911,911,911,911,911,911,196,196,196, +913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,913, +913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,913, +913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,913, +913,913,913,913,913,913,913,196,196,196,196,196,196,196,196,196, +914,914,914,914,914,914,914,914,914,914,914,914,914,914,914,914, +914,914,914,914,914,914,914,914,914,914,914,914,914,914,914,914, +914,914,914,914,914,914,914,914,915,915,915,915,915,915,916,917, /* block 106 */ -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, /* block 107 */ -847,847,847,847,847,847,847,847,847,847,847,847,848,849,850,850, -847,847,847,847,847,847,847,847,847,847,847,847,847,847,847,847, -851,851,851,851,851,851,851,851,851,851,847,847,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -240,241,240,241,240,241,240,241,240,241,852,853,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,240,241,854,246, -248,248,248,855,788,788,788,788,788,788,788,788,856,856,855,857, +918,918,918,918,918,918,918,918,918,918,918,918,919,920,921,921, +918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918, +922,922,922,922,922,922,922,922,922,922,918,918,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +275,276,275,276,275,276,275,276,275,276,923,924,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,925,281, +283,283,283,926,849,849,849,849,849,849,849,849,927,927,926,928, /* block 108 */ -240,241,240,241,240,241,240,241,240,241,240,241,240,241,240,241, -240,241,240,241,240,241,240,241,240,241,240,241,858,858,788,788, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,860,860,860,860,860,860,860,860,860,860, -861,861,862,863,864,864,864,863,163,163,163,163,163,163,163,163, +275,276,275,276,275,276,275,276,275,276,275,276,275,276,275,276, +275,276,275,276,275,276,275,276,275,276,275,276,929,929,849,849, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,931,931,931,931,931,931,931,931,931,931, +932,932,933,934,935,935,935,934,196,196,196,196,196,196,196,196, /* block 109 */ -865,865,865,865,865,865,865,865, 46, 46, 46, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46,149,149,149,149,149,149,149,149,149, +936,936,936,936,936,936,936,936, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46,151,151,151,151,151,151,151,151,151, 46, 46, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 70, 70, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, -645, 70, 70, 70, 70, 70, 70, 70, 70, 65, 66, 65, 66,866, 65, 66, +695, 70, 70, 70, 70, 70, 70, 70, 70, 65, 66, 65, 66,937, 65, 66, /* block 110 */ - 65, 66, 65, 66, 65, 66, 65, 66,149,867,867, 65, 66,868, 70, 92, - 65, 66, 65, 66,869, 70, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, - 65, 66, 65, 66, 65, 66, 65, 66, 65, 66,870,871,872,873,870, 70, -874,875,876,877, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, - 65, 66, 65, 66,878,879,880, 65, 66, 65, 66,163,163,163,163,163, - 65, 66,163, 70,163, 70, 65, 66, 65, 66,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,645,645,645, 65, 66, 92,147,147, 70, 92, 92, 92, 92, 92, + 65, 66, 65, 66, 65, 66, 65, 66,151,938,938, 65, 66,939, 70, 93, + 65, 66, 65, 66,940, 70, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, + 65, 66, 65, 66, 65, 66, 65, 66, 65, 66,941,942,943,944,941, 70, +945,946,947,948, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, 65, 66, + 65, 66, 65, 66,949,950,951, 65, 66, 65, 66,952, 65, 66,196,196, + 65, 66,196, 70,196, 70, 65, 66, 65, 66, 65, 66,953,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,695,695,695, 65, 66, 93,149,149, 70, 93, 93, 93, 93, 93, /* block 111 */ -881,881,882,881,881,881,883,881,881,881,881,882,881,881,881,881, -881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881, -881,881,881,884,884,882,882,884,885,885,885,885,883,163,163,163, -886,886,886,887,887,887,888,888,889,890,163,163,163,163,163,163, -891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, -891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, -891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, -891,891,891,891,892,892,893,893,163,163,163,163,163,163,163,163, +954,954,955,954,954,954,956,954,954,954,954,955,954,954,954,954, +954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954, +954,954,954,957,957,955,955,957,958,958,958,958,956,196,196,196, +959,959,959,960,960,960,961,961,962,963,196,196,196,196,196,196, +964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964, +964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964, +964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964, +964,964,964,964,965,965,966,966,196,196,196,196,196,196,196,196, /* block 112 */ -894,894,895,895,895,895,895,895,895,895,895,895,895,895,895,895, -895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,895, -895,895,895,895,895,895,895,895,895,895,895,895,895,895,895,895, -895,895,895,895,894,894,894,894,894,894,894,894,894,894,894,894, -894,894,894,894,896,897,163,163,163,163,163,163,163,163,898,898, -899,899,899,899,899,899,899,899,899,899,163,163,163,163,163,163, -336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336, -336,900,335,901,335,335,335,335,343,343,343,335,343,335,335,333, +967,967,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,968,968,968,968,968,968,968,968,968,968,968,968, +968,968,968,968,967,967,967,967,967,967,967,967,967,967,967,967, +967,967,967,967,969,970,196,196,196,196,196,196,196,196,971,971, +972,972,972,972,972,972,972,972,972,972,196,196,196,196,196,196, +378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378, +378,973,376,974,376,376,376,376,385,385,385,376,385,376,376,374, /* block 113 */ -902,902,902,902,902,902,902,902,902,902,903,903,903,903,903,903, -903,903,903,903,903,903,903,903,903,903,903,903,903,903,903,903, -903,903,903,903,903,903,904,904,904,904,904,905,905,905,906,907, -908,908,908,908,908,908,908,908,908,908,908,908,908,908,908,908, -908,908,908,908,908,908,908,909,909,909,909,909,909,909,909,909, -909,909,910,911,163,163,163,163,163,163,163,163,163,163,163,912, -479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479, -479,479,479,479,479,479,479,479,479,479,479,479,479,163,163,163, +975,975,975,975,975,975,975,975,975,975,976,976,976,976,976,976, +976,976,976,976,976,976,976,976,976,976,976,976,976,976,976,976, +976,976,976,976,976,976,977,977,977,977,977,978,978,978,979,980, +981,981,981,981,981,981,981,981,981,981,981,981,981,981,981,981, +981,981,981,981,981,981,981,982,982,982,982,982,982,982,982,982, +982,982,983,984,196,196,196,196,196,196,196,196,196,196,196,985, +528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528, +528,528,528,528,528,528,528,528,528,528,528,528,528,196,196,196, /* block 114 */ -913,913,913,914,915,915,915,915,915,915,915,915,915,915,915,915, -915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915, -915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915, -915,915,915,916,914,914,913,913,913,913,914,914,913,913,914,914, -917,918,918,918,918,918,918,919,920,920,918,918,918,918,163,921, -922,922,922,922,922,922,922,922,922,922,163,163,163,163,918,918, -462,462,462,462,462,472,923,462,462,462,462,462,462,462,462,462, -473,473,473,473,473,473,473,473,473,473,462,462,462,462,462,163, +986,986,986,987,988,988,988,988,988,988,988,988,988,988,988,988, +988,988,988,988,988,988,988,988,988,988,988,988,988,988,988,988, +988,988,988,988,988,988,988,988,988,988,988,988,988,988,988,988, +988,988,988,989,987,987,986,986,986,986,987,987,986,986,987,987, +990,991,991,991,991,991,991,992,993,993,991,991,991,991,196,994, +995,995,995,995,995,995,995,995,995,995,196,196,196,196,991,991, +511,511,511,511,511,521,996,511,511,511,511,511,511,511,511,511, +522,522,522,522,522,522,522,522,522,522,511,511,511,511,511,196, /* block 115 */ -924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, -924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924, -924,924,924,924,924,924,924,924,924,925,925,925,925,925,925,926, -926,925,925,926,926,925,925,163,163,163,163,163,163,163,163,163, -924,924,924,925,924,924,924,924,924,924,924,924,925,926,163,163, -927,927,927,927,927,927,927,927,927,927,163,163,928,929,929,929, -462,462,462,462,462,462,462,462,462,462,462,462,462,462,462,462, -923,462,462,462,462,462,462,474,474,474,462,471,472,471,462,462, +997,997,997,997,997,997,997,997,997,997,997,997,997,997,997,997, +997,997,997,997,997,997,997,997,997,997,997,997,997,997,997,997, +997,997,997,997,997,997,997,997,997,998,998,998,998,998,998,999, +999,998,998,999,999,998,998,196,196,196,196,196,196,196,196,196, +997,997,997,998,997,997,997,997,997,997,997,997,998,999,196,196, +1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,196,196,1001,1002,1002,1002, +511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511, +996,511,511,511,511,511,511,523,523,523,511,520,521,520,511,511, /* block 116 */ -930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, -930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, -930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, -931,930,931,931,931,932,932,931,931,932,930,932,932,930,931,933, -934,933,934,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,930,930,935,936,937, -938,938,938,938,938,938,938,938,938,938,938,939,940,940,939,939, -941,941,938,942,942,939,943,163,163,163,163,163,163,163,163,163, +1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003, +1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003, +1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003, +1004,1003,1004,1004,1004,1005,1005,1004,1004,1005,1003,1005,1005,1003,1004,1006, +1007,1006,1007,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,1003,1003,1008,1009,1010, +1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1012,1013,1013,1012,1012, +1014,1014,1011,1015,1015,1012,1016,196,196,196,196,196,196,196,196,196, /* block 117 */ -163,484,484,484,484,484,484,163,163,484,484,484,484,484,484,163, -163,484,484,484,484,484,484,163,163,163,163,163,163,163,163,163, -484,484,484,484,484,484,484,163,484,484,484,484,484,484,484,163, +196,533,533,533,533,533,533,196,196,533,533,533,533,533,533,196, +196,533,533,533,533,533,533,196,196,196,196,196,196,196,196,196, +533,533,533,533,533,533,533,196,533,533,533,533,533,533,533,196, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, - 70, 70, 70,944, 70, 70, 70, 70, 70, 70, 70,867,147,147,147,147, - 70, 70, 70, 70, 70,221, 70, 70, 70,147, 46, 46,163,163,163,163, -945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, + 70, 70, 70,1017, 70, 70, 70, 70, 70, 70, 70,938,149,149,149,149, + 70, 70, 70, 70, 70,256, 70, 70, 70,149, 46, 46,196,196,196,196, +1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, /* block 118 */ -945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, -945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, -945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, -945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938, -938,938,938,939,939,940,939,939,940,939,939,941,946,943,163,163, -947,947,947,947,947,947,947,947,947,947,163,163,163,163,163,163, +1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, +1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, +1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, +1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, +1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011, +1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011, +1011,1011,1011,1012,1012,1013,1012,1012,1013,1012,1012,1014,1019,1016,196,196, +1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,196,196,196,196,196,196, /* block 119 */ -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, /* block 120 */ -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, /* block 121 */ -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, /* block 122 */ -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, /* block 123 */ -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, /* block 124 */ -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, /* block 125 */ -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,948,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,948,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, /* block 126 */ -949,949,949,949,949,949,949,949,948,949,949,949,949,949,949,949, -949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949, -949,949,949,949,163,163,163,163,163,163,163,163,163,163,163,163, -482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482, -482,482,482,482,482,482,482,163,163,163,163,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, -483,483,483,483,483,483,483,483,483,483,483,483,163,163,163,163, +1022,1022,1022,1022,1022,1022,1022,1022,1021,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022, +1022,1022,1022,1022,196,196,196,196,196,196,196,196,196,196,196,196, +531,531,531,531,531,531,531,531,531,531,531,531,531,531,531,531, +531,531,531,531,531,531,531,196,196,196,196,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,532,532,532,532, +532,532,532,532,532,532,532,532,532,532,532,532,196,196,196,196, /* block 127 */ -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, -950,950,950,950,950,950,950,950,950,950,950,950,950,950,950,950, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, +1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* block 128 */ -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, +1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, /* block 129 */ -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, /* block 130 */ -952,952,952,952,952,952,952,952,952,952,952,952,952,952,839,839, -952,839,952,839,839,952,952,952,952,952,952,952,952,952,952,839, -952,839,952,839,839,952,952,839,839,839,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,163,163, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,910,910, +1025,910,1025,910,910,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,910, +1025,910,1025,910,910,1025,1025,910,910,910,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,196,196, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, /* block 131 */ -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 132 */ -653,653,653,653,653,653,653,163,163,163,163,163,163,163,163,163, -163,163,163,257,257,257,257,257,163,163,163,163,163,270,265,270, -270,270,270,270,270,270,270,270,270,953,270,270,270,270,270,270, -270,270,270,270,270,270,270,262,270,270,270,270,270,262,270,262, -270,270,262,270,270,262,270,270,270,270,270,270,270,270,270,270, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +703,703,703,703,703,1026,1027,196,196,196,196,196,196,196,196,196, +196,196,196,292,292,292,292,292,196,196,196,196,196,305,300,305, +305,305,305,305,305,305,305,305,305,1028,305,305,305,305,305,305, +305,305,305,305,305,305,305,297,305,305,305,305,305,297,305,297, +305,305,297,305,305,297,305,305,305,305,305,305,305,305,305,305, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, /* block 133 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,331,331,331,331,331,331,331,331,331,331,331,331,331,331, -331,331,331,302,302,302,302,302,302,302,302,302,302,302,302,302, -302,302,302,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,370,370,370,370,370,370,370,370,370,370,370,370,370,370, +370,370,370,340,340,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, /* block 134 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,954,954, -954,954,954,954,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,1029,1029, +1029,1029,1029,1029,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, /* block 135 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, /* block 136 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,955,956, -280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,1030,1031, +315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, /* block 137 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -302,302,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,302,302,302,302,302,302,302,280, -957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, -957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957, -286,286,958,286,286,286,286,286,286,286,954,954,277,959,280,280, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +340,340,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,340,340,340,340,340,340,340,315, +1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032, +1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032, +321,321,1033,321,321,321,321,321,321,321,1029,1029,312,1034,315,315, /* block 138 */ -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,961, -962,962,962,963,962,962,962,964,965,962,163,163,163,163,163,163, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,856,856, -962,966,966,701,701,964,965,964,965,964,965,964,965,964,965,964, -965,967,968,967,968,799,799,964,965,962,962,962,962,701,701,701, -969,166,970,163,166,971,972,972,966,973,974,973,974,973,974,975, -962,976,714,977,978,978,716,163,976,430,975,962,163,163,163,163, -954,286,954,286,954,302,954,286,954,286,954,286,954,286,954,286, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1036, +1037,1037,1038,1039,1037,1038,1038,1040,1041,1037,196,196,196,196,196,196, +176,176,176,176,176,176,176,176,176,176,176,176,176,176,927,927, +1037,1042,1042,755,755,1040,1041,1040,1041,1040,1041,1040,1041,1040,1041,1040, +1041,1043,1044,1043,1044,866,866,1040,1041,1037,1037,1037,1037,755,755,755, +1045,199,1046,196,199,1047,1038,1038,1042,1048,1049,1048,1049,1048,1049,1050, +1037,1051,1052,1053,1054,1054,794,196,1051,479,1050,1037,196,196,196,196, +1029,321,1029,321,1029,340,1029,321,1029,321,1029,321,1029,321,1029,321, /* block 139 */ -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,286,286,286, -286,286,286,286,286,286,286,286,286,286,286,286,286,302,302, 51, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321, +321,321,321,321,321,321,321,321,321,321,321,321,321,340,340, 51, /* block 140 */ -163,972,979,975,430,975,962,980,973,974,962,714,969,981,970,982, -983,983,983,983,983,983,983,983,983,983,971,166,978,716,978,972, -962,984,984,984,984,984,984, 59, 59, 59, 59, 59, 59, 59, 59, 59, - 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,973,976,974,985,701, - 46,986,986,986,986,986,986, 62, 62, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,973,716,974,716,973, -974,987,988,989,990,826,825,825,825,825,825,825,825,825,825,825, -827,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, +196,1038,1055,1050,479,1050,1037,1056,1048,1049,1037,1052,1045,1057,1046,1058, +1059,1059,1059,1059,1059,1059,1059,1059,1059,1059,1047,199,1054,794,1054,1038, +1037,1060,1060,1060,1060,1060,1060, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,1048,1051,1049,1061,755, + 46,1062,1062,1062,1062,1062,1062, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,1048,794,1049,794,1048, +1049,1063,1064,1065,1066,897,896,896,896,896,896,896,896,896,896,896, +898,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, /* block 141 */ -825,825,825,825,825,825,825,825,825,825,825,825,825,825,825,825, -825,825,825,825,825,825,825,825,825,825,825,825,825,825,991,991, -831,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830, -830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,163, -163,163,830,830,830,830,830,830,163,163,830,830,830,830,830,830, -163,163,830,830,830,830,830,830,163,163,830,830,830,163,163,163, -430,430,716, 46,724,430,430,163,724,716,716,716,716,724,724,163, -708,708,708,708,708,708,708,708,708,992,992,992,724,724,957,957, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896, +896,896,896,896,896,896,896,896,896,896,896,896,896,896,1067,1067, +902,901,901,901,901,901,901,901,901,901,901,901,901,901,901,901, +901,901,901,901,901,901,901,901,901,901,901,901,901,901,901,196, +196,196,901,901,901,901,901,901,196,196,901,901,901,901,901,901, +196,196,901,901,901,901,901,901,196,196,901,901,901,196,196,196, +479,479,794, 46,781,479,479,196,781,794,794,794,794,781,781,196, +765,765,765,765,765,765,765,765,765,1068,1068,1068,781,781,1032,1032, /* block 142 */ -993,993,993,993,993,993,993,993,993,993,993,993,163,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,163,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,163,993,993,163,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,163,163, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,196,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,196,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,196,1069,1069,196,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,196,196, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 143 */ -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,993,993,993,993,993, -993,993,993,993,993,993,993,993,993,993,993,163,163,163,163,163, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069, +1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,1069,196,196,196,196,196, /* block 144 */ -994,995,996,163,163,163,163,997,997,997,997,997,997,997,997,997, -997,997,997,997,997,997,997,997,997,997,997,997,997,997,997,997, -997,997,997,997,997,997,997,997,997,997,997,997,997,997,997,997, -997,997,997,997,163,163,163,998,998,998,998,998,998,998,998,998, -999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999, -999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999, -999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999, -999,999,999,999,999,1000,1000,1000,1000,1001,1001,1001,1001,1001,1001,1001, +1070,1071,1072,196,196,196,196,1073,1073,1073,1073,1073,1073,1073,1073,1073, +1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073, +1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073, +1073,1073,1073,1073,196,196,196,1074,1074,1074,1074,1074,1074,1074,1074,1074, +1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075, +1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075, +1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075, +1075,1075,1075,1075,1075,1076,1076,1076,1076,1077,1077,1077,1077,1077,1077,1077, /* block 145 */ -1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1000,1000,1001,1002,1002,163, -724,724,724,724,724,724,724,724,724,724,724,724,724,163,163,163, -1001,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,158,163,163, +1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1076,1076,1077,1078,1078,196, +781,781,781,781,781,781,781,781,781,781,781,781,781,196,196,196, +1077,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,189,196,196, /* block 146 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 147 */ -1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003, -1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,1003,163,163,163, -1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004, -1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004, -1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004,1004, -1004,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1005,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006, -1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,1006,163,163,163,163, +1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079, +1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,1079,196,196,196, +1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, +1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, +1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, +1080,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1081,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082, +1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,196,196,196,196, /* block 148 */ -1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007, -1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007,1007, -1008,1008,1008,1008,163,163,163,163,163,163,163,163,163,1007,1007,1007, -1009,1009,1009,1009,1009,1009,1009,1009,1009,1009,1009,1009,1009,1009,1009,1009, -1009,1010,1009,1009,1009,1009,1009,1009,1009,1009,1010,163,163,163,163,163, -1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011, -1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011,1011, -1011,1011,1011,1011,1011,1011,1012,1012,1012,1012,1012,163,163,163,163,163, +1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083, +1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083,1083, +1084,1084,1084,1084,196,196,196,196,196,196,196,196,196,1083,1083,1083, +1085,1085,1085,1085,1085,1085,1085,1085,1085,1085,1085,1085,1085,1085,1085,1085, +1085,1086,1085,1085,1085,1085,1085,1085,1085,1085,1086,196,196,196,196,196, +1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087, +1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,1087, +1087,1087,1087,1087,1087,1087,1088,1088,1088,1088,1088,196,196,196,196,196, /* block 149 */ -1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013, -1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,1013,163,1014, -1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015, -1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015,1015, -1015,1015,1015,1015,163,163,163,163,1015,1015,1015,1015,1015,1015,1015,1015, -1016,1017,1017,1017,1017,1017,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089, +1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,196,1090, +1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091, +1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091,1091, +1091,1091,1091,1091,196,196,196,196,1091,1091,1091,1091,1091,1091,1091,1091, +1092,1093,1093,1093,1093,1093,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 150 */ -1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, -1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018,1018, -1018,1018,1018,1018,1018,1018,1018,1018,1019,1019,1019,1019,1019,1019,1019,1019, -1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019, -1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019, -1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020, -1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020, -1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020, +1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094, +1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094, +1094,1094,1094,1094,1094,1094,1094,1094,1095,1095,1095,1095,1095,1095,1095,1095, +1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095, +1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095, +1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096, +1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096, +1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096,1096, /* block 151 */ -1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021, -1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,1021,163,163, -1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,163,163,163,163,163,163, -1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, -1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, -1023,1023,1023,1023,163,163,163,163,1024,1024,1024,1024,1024,1024,1024,1024, -1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024, -1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,1024,163,163,163,163, +1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097, +1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,1097,196,196, +1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,196,196,196,196,196,196, +1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099, +1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099,1099, +1099,1099,1099,1099,196,196,196,196,1100,1100,1100,1100,1100,1100,1100,1100, +1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100, +1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,1100,196,196,196,196, /* block 152 */ -1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, -1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, -1025,1025,1025,1025,1025,1025,1025,1025,163,163,163,163,163,163,163,163, -1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026, -1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026, -1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026,1026, -1026,1026,1026,1026,163,163,163,163,163,163,163,163,163,163,163,1027, -1028,1028,1028,1028,1028,1028,1028,1028,1028,1028,1028,163,1028,1028,1028,1028, +1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101, +1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101, +1101,1101,1101,1101,1101,1101,1101,1101,196,196,196,196,196,196,196,196, +1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102, +1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102, +1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102,1102, +1102,1102,1102,1102,196,196,196,196,196,196,196,196,196,196,196,1103, +1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,196,1104,1104,1104,1104, /* block 153 */ -1028,1028,1028,1028,1028,1028,1028,1028,1028,1028,1028,163,1028,1028,1028,1028, -1028,1028,1028,163,1028,1028,163,1029,1029,1029,1029,1029,1029,1029,1029,1029, -1029,1029,163,1029,1029,1029,1029,1029,1029,1029,1029,1029,1029,1029,1029,1029, -1029,1029,163,1029,1029,1029,1029,1029,1029,1029,163,1029,1029,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,196,1104,1104,1104,1104, +1104,1104,1104,196,1104,1104,196,1105,1105,1105,1105,1105,1105,1105,1105,1105, +1105,1105,196,1105,1105,1105,1105,1105,1105,1105,1105,1105,1105,1105,1105,1105, +1105,1105,196,1105,1105,1105,1105,1105,1105,1105,196,1105,1105,196,196,196, +1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106, +1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106, +1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106, +1106,1106,1106,1106,196,196,196,196,196,196,196,196,196,196,196,196, /* blockblock 155 */ -1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030, -1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030, -1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030, -1030,1030,1030,1030,1030,1030,1030,163,163,163,163,163,163,163,163,163, -1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030,1030, -1030,1030,1030,1030,1030,1030,163,163,163,163,163,163,163,163,163,163, -1030,1030,1030,1030,1030,1030,1030,1030,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107, +1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107, +1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107, +1107,1107,1107,1107,1107,1107,1107,196,196,196,196,196,196,196,196,196, +1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107,1107, +1107,1107,1107,1107,1107,1107,196,196,196,196,196,196,196,196,196,196, +1107,1107,1107,1107,1107,1107,1107,1107,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 156 */ -147,1031,1031,147,147,147,163,147,147,147,147,147,147,147,147,147, -147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147, -147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147, -147,163,147,147,147,147,147,147,147,147,147,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +149,1108,1108,149,149,149,196,149,149,149,149,149,149,149,149,149, +149,149,149,149,149,149,149,149,149,149,149,149,149,149,149,149, +149,149,149,149,149,149,149,149,149,149,149,149,149,149,149,149, +149,196,149,149,149,149,149,149,149,149,149,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 157 */ -1032,1032,1032,1032,1032,1032,262,262,1032,262,1032,1032,1032,1032,1032,1032, -1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032, -1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032,1032, -1032,1032,1032,1032,1032,1032,262,1032,1032,262,262,262,1032,262,262,1032, -1033,1033,1033,1033,1033,1033,1033,1033,1033,1033,1033,1033,1033,1033,1033,1033, -1033,1033,1033,1033,1033,1033,262,1034,1035,1035,1035,1035,1035,1035,1035,1035, -1036,1036,1036,1036,1036,1036,1036,1036,1036,1036,1036,1036,1036,1036,1036,1036, -1036,1036,1036,1036,1036,1036,1036,1037,1037,1038,1038,1038,1038,1038,1038,1038, +1109,1109,1109,1109,1109,1109,297,297,1109,297,1109,1109,1109,1109,1109,1109, +1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109, +1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109,1109, +1109,1109,1109,1109,1109,1109,297,1109,1109,297,297,297,1109,297,297,1109, +1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110, +1110,1110,1110,1110,1110,1110,297,1111,1112,1112,1112,1112,1112,1112,1112,1112, +1113,1113,1113,1113,1113,1113,1113,1113,1113,1113,1113,1113,1113,1113,1113,1113, +1113,1113,1113,1113,1113,1113,1113,1114,1114,1115,1115,1115,1115,1115,1115,1115, /* block 158 */ -1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039, -1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,1039,262, -262,262,262,262,262,262,262,1040,1040,1040,1040,1040,1040,1040,1040,1040, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -1041,1041,1041,1041,1041,1041,1041,1041,1041,1041,1041,1041,1041,1041,1041,1041, -1041,1041,1041,262,1041,1041,262,262,262,262,262,1042,1042,1042,1042,1042, +1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116, +1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,1116,297, +297,297,297,297,297,297,297,1117,1117,1117,1117,1117,1117,1117,1117,1117, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +1118,1118,1118,1118,1118,1118,1118,1118,1118,1118,1118,1118,1118,1118,1118,1118, +1118,1118,1118,297,1118,1118,297,297,297,297,297,1119,1119,1119,1119,1119, /* block 159 */ -1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043,1043, -1043,1043,1043,1043,1043,1043,1044,1044,1044,1044,1044,1044,262,262,262,1045, -1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,1046, -1046,1046,1046,1046,1046,1046,1046,1046,1046,1046,262,262,262,262,262,1047, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1120,1120,1120,1120,1120,1120,1120,1120,1120,1120,1120,1120,1120,1120,1120,1120, +1120,1120,1120,1120,1120,1120,1121,1121,1121,1121,1121,1121,297,297,297,1122, +1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,1123, +1123,1123,1123,1123,1123,1123,1123,1123,1123,1123,297,297,297,297,297,1124, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 160 */ -1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048, -1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048,1048, -1049,1049,1049,1049,1049,1049,1049,1049,1049,1049,1049,1049,1049,1049,1049,1049, -1049,1049,1049,1049,1049,1049,1049,1049,262,262,262,262,1050,1050,1049,1049, -1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050, -262,262,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050, -1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050, -1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050,1050, +1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125, +1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125,1125, +1126,1126,1126,1126,1126,1126,1126,1126,1126,1126,1126,1126,1126,1126,1126,1126, +1126,1126,1126,1126,1126,1126,1126,1126,297,297,297,297,1127,1127,1126,1126, +1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127, +297,297,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127, +1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127, +1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127, /* block 161 */ -1051,1052,1052,1052,262,1052,1052,262,262,262,262,262,1052,1052,1052,1052, -1051,1051,1051,1051,262,1051,1051,1051,262,1051,1051,1051,1051,1051,1051,1051, -1051,1051,1051,1051,1051,1051,1051,1051,1051,1051,1051,1051,1051,1051,1051,1051, -1051,1051,1051,1051,1051,1051,262,262,1053,1053,1053,262,262,262,262,1054, -1055,1055,1055,1055,1055,1055,1055,1055,1055,262,262,262,262,262,262,262, -1056,1056,1056,1056,1056,1056,1057,1057,1056,262,262,262,262,262,262,262, -1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058, -1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1058,1059,1059,1060, +1128,1129,1129,1129,297,1129,1129,297,297,297,297,297,1129,1129,1129,1129, +1128,1128,1128,1128,297,1128,1128,1128,297,1128,1128,1128,1128,1128,1128,1128, +1128,1128,1128,1128,1128,1128,1128,1128,1128,1128,1128,1128,1128,1128,1128,1128, +1128,1128,1128,1128,1128,1128,297,297,1130,1130,1130,297,297,297,297,1131, +1132,1132,1132,1132,1132,1132,1132,1132,1132,297,297,297,297,297,297,297, +1133,1133,1133,1133,1133,1133,1134,1134,1133,297,297,297,297,297,297,297, +1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135, +1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1136,1136,1137, /* block 162 */ -1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061, -1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1061,1062,1062,1062, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -1063,1063,1063,1063,1063,1063,1063,1063,1064,1063,1063,1063,1063,1063,1063,1063, -1063,1063,1063,1063,1063,1063,1063,1063,1063,1063,1063,1063,1063,1063,1063,1063, -1063,1063,1063,1063,1063,1065,1065,262,262,262,262,1066,1066,1066,1066,1066, -1067,1067,1068,1067,1067,1067,1069,262,262,262,262,262,262,262,262,262, +1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138, +1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1138,1139,1139,1139, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +1140,1140,1140,1140,1140,1140,1140,1140,1141,1140,1140,1140,1140,1140,1140,1140, +1140,1140,1140,1140,1140,1140,1140,1140,1140,1140,1140,1140,1140,1140,1140,1140, +1140,1140,1140,1140,1140,1142,1142,297,297,297,297,1143,1143,1143,1143,1143, +1144,1144,1145,1144,1144,1144,1146,297,297,297,297,297,297,297,297,297, /* block 163 */ -1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070, -1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070, -1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070,1070, -1070,1070,1070,1070,1070,1070,262,262,262,1071,1072,1072,1072,1072,1072,1072, -1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073,1073, -1073,1073,1073,1073,1073,1073,262,262,1074,1074,1074,1074,1074,1074,1074,1074, -1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075,1075, -1075,1075,1075,262,262,262,262,262,1076,1076,1076,1076,1076,1076,1076,1076, +1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147, +1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147, +1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147,1147, +1147,1147,1147,1147,1147,1147,297,297,297,1148,1149,1149,1149,1149,1149,1149, +1150,1150,1150,1150,1150,1150,1150,1150,1150,1150,1150,1150,1150,1150,1150,1150, +1150,1150,1150,1150,1150,1150,297,297,1151,1151,1151,1151,1151,1151,1151,1151, +1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152, +1152,1152,1152,297,297,297,297,297,1153,1153,1153,1153,1153,1153,1153,1153, /* block 164 */ -1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077, -1077,1077,262,262,262,262,262,262,262,1078,1078,1078,1078,262,262,262, -262,262,262,262,262,262,262,262,262,1079,1079,1079,1079,1079,1079,1079, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154, +1154,1154,297,297,297,297,297,297,297,1155,1155,1155,1155,297,297,297, +297,297,297,297,297,297,297,297,297,1156,1156,1156,1156,1156,1156,1156, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 165 */ -1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, -1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, -1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, -1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080,1080, -1080,1080,1080,1080,1080,1080,1080,1080,1080,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157, +1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157, +1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157, +1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157,1157, +1157,1157,1157,1157,1157,1157,1157,1157,1157,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 166 */ -1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081, -1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081, -1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081,1081, -1081,1081,1081,262,262,262,262,262,262,262,262,262,262,262,262,262, -1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082, -1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082, -1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082,1082, -1082,1082,1082,262,262,262,262,262,262,262,1083,1083,1083,1083,1083,1083, +1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158, +1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158, +1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158,1158, +1158,1158,1158,297,297,297,297,297,297,297,297,297,297,297,297,297, +1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159, +1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159, +1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,1159, +1159,1159,1159,297,297,297,297,297,297,297,1160,1160,1160,1160,1160,1160, /* block 167 */ -1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084, -1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084,1084, -1084,1084,1085,1085,1086,1086,1086,1086,302,302,302,302,302,302,302,302, -1087,1087,1087,1087,1087,1087,1087,1087,1087,1087,302,302,302,302,302,302, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161, +1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161,1161, +1161,1161,1162,1162,1163,1163,1163,1163,340,340,340,340,340,340,340,340, +1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,340,340,340,340,340,340, +1165,1165,1165,1165,1165,1165,1165,1165,1165,1165,1166,1166,1166,1166,1167,1166, +1168,1168,1168,1168,1168,1168,1168,1168,1168,1168,1168,1168,1168,1168,1168,1168, +1168,1168,1168,1168,1168,1168,297,297,297,1169,1170,1171,1171,1171,1172,1173, +1174,1174,1174,1174,1174,1174,1174,1174,1174,1174,1174,1174,1174,1174,1174,1174, /* block 168 */ -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1174,1174,1174,1174,1174,1174,297,297,297,297,297,297,297,297,1175,1175, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 169 */ -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088, -1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,1088,262, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176, +1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,1176,297, /* block 170 */ -1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089, -1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,1089, -1089,1089,1089,1089,1089,1089,1089,1089,1089,1089,262,1090,1090,1091,262,262, -1089,1089,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,291,291,291, +1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177, +1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,1177, +1177,1177,1177,1177,1177,1177,1177,1177,1177,1177,297,1178,1178,1179,297,297, +1177,1177,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +340,340,321,321,321,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,316,333,333,333, /* block 171 */ -1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092, -1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1092,1093,1093,1093, -1093,1093,1093,1093,1093,1093,1093,1092,262,262,262,262,262,262,262,262, -1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094,1094, -1094,1094,1094,1094,1094,1094,1095,1095,1095,1095,1095,1095,1095,1095,1095,1095, -1095,1096,1096,1096,1096,1097,1097,1097,1097,1097,302,302,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,1098,1098, +1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180, +1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1180,1181,1181,1181, +1181,1181,1181,1181,1181,1181,1181,1180,297,297,297,297,297,297,297,297, +1182,1182,1182,1182,1182,1182,1182,1182,1182,1182,1182,1182,1182,1182,1182,1182, +1182,1182,1182,1182,1182,1182,1183,1183,1183,1183,1183,1183,1183,1183,1183,1183, +1183,1184,1184,1184,1184,1185,1185,1185,1185,1185,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +1186,1186,1186,1186,1186,1186,1186,1186,1186,1186,1186,1186,1186,1186,1186,1186, /* block 172 */ -1098,1098,1099,1099,1099,1099,1100,1100,1100,1100,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101,1101, -1101,1101,1101,1101,1101,1102,1102,1102,1102,1102,1102,1102,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -1103,1103,1103,1103,1103,1103,1103,1103,1103,1103,1103,1103,1103,1103,1103,1103, -1103,1103,1103,1103,1103,1103,1103,262,262,262,262,262,262,262,262,262, +1186,1186,1187,1187,1187,1187,1188,1188,1188,1188,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189, +1189,1189,1189,1189,1189,1190,1190,1190,1190,1190,1190,1190,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +1191,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191,1191, +1191,1191,1191,1191,1191,1191,1191,297,297,297,297,297,297,297,297,297, /* block 173 */ -1104,1105,1104,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106, -1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106, -1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106,1106, -1106,1106,1106,1106,1106,1106,1106,1106,1105,1105,1105,1105,1105,1105,1105,1105, -1105,1105,1105,1105,1105,1105,1107,1108,1108,1109,1109,1109,1109,1109,163,163, -163,163,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110,1110, -1110,1110,1110,1110,1110,1110,1111,1111,1111,1111,1111,1111,1111,1111,1111,1111, -1107,1106,1106,1105,1105,1106,163,163,163,163,163,163,163,163,163,1112, +1192,1193,1192,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194, +1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194, +1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194,1194, +1194,1194,1194,1194,1194,1194,1194,1194,1193,1193,1193,1193,1193,1193,1193,1193, +1193,1193,1193,1193,1193,1193,1195,1196,1196,1197,1197,1197,1197,1197,196,196, +196,196,1198,1198,1198,1198,1198,1198,1198,1198,1198,1198,1198,1198,1198,1198, +1198,1198,1198,1198,1198,1198,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199, +1195,1194,1194,1193,1193,1194,196,196,196,196,196,196,196,196,196,1200, /* block 174 */ -1113,1113,1114,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115, -1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115, -1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115,1115, -1114,1114,1114,1113,1113,1113,1113,1114,1114,1116,1117,1118,1118,1119,1120,1120, -1120,1120,1113,163,163,163,163,163,163,163,163,163,163,1119,163,163, -1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121, -1121,1121,1121,1121,1121,1121,1121,1121,1121,163,163,163,163,163,163,163, -1122,1122,1122,1122,1122,1122,1122,1122,1122,1122,163,163,163,163,163,163, +1201,1201,1202,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203, +1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203, +1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203,1203, +1202,1202,1202,1201,1201,1201,1201,1202,1202,1204,1205,1206,1206,1207,1208,1208, +1208,1208,1201,196,196,196,196,196,196,196,196,196,196,1207,196,196, +1209,1209,1209,1209,1209,1209,1209,1209,1209,1209,1209,1209,1209,1209,1209,1209, +1209,1209,1209,1209,1209,1209,1209,1209,1209,196,196,196,196,196,196,196, +1210,1210,1210,1210,1210,1210,1210,1210,1210,1210,196,196,196,196,196,196, /* block 175 */ -1123,1123,1123,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124, -1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124,1124, -1124,1124,1124,1124,1124,1124,1124,1123,1123,1123,1123,1123,1125,1123,1123,1123, -1123,1123,1123,1126,1126,163,1127,1127,1127,1127,1127,1127,1127,1127,1127,1127, -1128,1129,1129,1129,1124,1125,1125,1124,163,163,163,163,163,163,163,163, -1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130, -1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130,1130, -1130,1130,1130,1131,1132,1132,1130,163,163,163,163,163,163,163,163,163, +1211,1211,1211,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212, +1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,1212, +1212,1212,1212,1212,1212,1212,1212,1211,1211,1211,1211,1211,1213,1211,1211,1211, +1211,1211,1211,1214,1214,196,1215,1215,1215,1215,1215,1215,1215,1215,1215,1215, +1216,1217,1217,1217,1212,1213,1213,1212,196,196,196,196,196,196,196,196, +1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218, +1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1218, +1218,1218,1218,1219,1220,1220,1218,196,196,196,196,196,196,196,196,196, /* block 176 */ -1133,1133,1134,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135, -1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135, -1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135,1135, -1135,1135,1135,1134,1134,1134,1133,1133,1133,1133,1133,1133,1133,1133,1133,1134, -1136,1135,1137,1137,1135,1138,1138,1139,1139,1140,1141,1141,1141,1138,1134,1133, -1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1135,1139,1135,1139,1138,1138, -163,1143,1143,1143,1143,1143,1143,1143,1143,1143,1143,1143,1143,1143,1143,1143, -1143,1143,1143,1143,1143,163,163,163,163,163,163,163,163,163,163,163, +1221,1221,1222,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223, +1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223, +1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223,1223, +1223,1223,1223,1222,1222,1222,1221,1221,1221,1221,1221,1221,1221,1221,1221,1222, +1224,1223,1225,1225,1223,1226,1226,1227,1227,1228,1229,1229,1229,1226,1222,1221, +1230,1230,1230,1230,1230,1230,1230,1230,1230,1230,1223,1227,1223,1227,1226,1226, +196,1231,1231,1231,1231,1231,1231,1231,1231,1231,1231,1231,1231,1231,1231,1231, +1231,1231,1231,1231,1231,196,196,196,196,196,196,196,196,196,196,196, /* block 177 */ -1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144, -1144,1144,163,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144, -1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1144,1145,1145,1145,1146, -1146,1146,1145,1145,1146,1147,1148,1146,1149,1149,1150,1149,1149,1151,1146,1144, -1144,1146,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232, +1232,1232,196,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232, +1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1232,1233,1233,1233,1234, +1234,1234,1233,1233,1234,1235,1236,1237,1238,1238,1239,1238,1238,1240,1234,1232, +1232,1234,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 178 */ -1152,1152,1152,1152,1152,1152,1152,163,1152,163,1152,1152,1152,1152,163,1152, -1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,1152,163,1152, -1152,1152,1152,1152,1152,1152,1152,1152,1152,1153,163,163,163,163,163,163, -1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154, -1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154, -1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1154,1155, -1156,1156,1156,1155,1155,1155,1155,1155,1155,1157,1158,163,163,163,163,163, -1159,1159,1159,1159,1159,1159,1159,1159,1159,1159,163,163,163,163,163,163, +1241,1241,1241,1241,1241,1241,1241,196,1241,196,1241,1241,1241,1241,196,1241, +1241,1241,1241,1241,1241,1241,1241,1241,1241,1241,1241,1241,1241,1241,196,1241, +1241,1241,1241,1241,1241,1241,1241,1241,1241,1242,196,196,196,196,196,196, +1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243, +1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243, +1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,1244, +1245,1245,1245,1244,1244,1244,1244,1244,1244,1246,1247,196,196,196,196,196, +1248,1248,1248,1248,1248,1248,1248,1248,1248,1248,196,196,196,196,196,196, /* block 179 */ -1160,1161,1162,1163,163,1164,1164,1164,1164,1164,1164,1164,1164,163,163,1164, -1164,163,163,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164,1164, -1164,1164,1164,1164,1164,1164,1164,1164,1164,163,1164,1164,1164,1164,1164,1164, -1164,163,1164,1164,163,1164,1164,1164,1164,1164,163,1165,1166,1164,1167,1162, -1160,1162,1162,1162,1162,163,163,1162,1162,163,163,1162,1162,1168,163,163, -1164,163,163,163,163,163,163,1167,163,163,163,163,163,1169,1164,1164, -1164,1164,1162,1162,163,163,1170,1170,1170,1170,1170,1170,1170,163,163,163, -1170,1170,1170,1170,1170,163,163,163,163,163,163,163,163,163,163,163, +1249,1250,1251,1252,196,1253,1253,1253,1253,1253,1253,1253,1253,196,196,1253, +1253,196,196,1253,1253,1253,1253,1253,1253,1253,1253,1253,1253,1253,1253,1253, +1253,1253,1253,1253,1253,1253,1253,1253,1253,196,1253,1253,1253,1253,1253,1253, +1253,196,1253,1253,196,1253,1253,1253,1253,1253,196,1254,1255,1253,1256,1251, +1249,1251,1251,1251,1251,196,196,1251,1251,196,196,1251,1251,1257,196,196, +1253,196,196,196,196,196,196,1256,196,196,196,196,196,1258,1253,1253, +1253,1253,1251,1251,196,196,1259,1259,1259,1259,1259,1259,1259,196,196,196, +1259,1259,1259,1259,1259,196,196,196,196,196,196,196,196,196,196,196, /* block 180 */ -1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171, -1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171, -1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171,1171, -1171,1171,1171,1171,1171,1172,1172,1172,1173,1173,1173,1173,1173,1173,1173,1173, -1172,1172,1174,1173,1173,1172,1175,1171,1171,1171,1171,1176,1176,1177,1178,1178, -1179,1179,1179,1179,1179,1179,1179,1179,1179,1179,1177,1177,163,1178,1180,1171, -1171,1171,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,196,1260,196,196,1260,196, +1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260, +1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260,1260, +1260,1260,1260,1260,1260,1260,196,1260,1261,1262,1262,1263,1263,1263,1263,1263, +1263,196,1261,196,196,1261,196,1261,1261,1261,1262,196,1262,1262,1264,1265, +1264,1266,1267,1268,1269,1269,196,1270,1270,196,196,196,196,196,196,196, +196,1271,1271,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 181 */ -1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181, -1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181, -1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181,1181, -1182,1183,1183,1184,1184,1184,1184,1184,1184,1183,1184,1183,1183,1182,1183,1184, -1184,1183,1185,1186,1181,1181,1187,1181,163,163,163,163,163,163,163,163, -1188,1188,1188,1188,1188,1188,1188,1188,1188,1188,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272, +1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272, +1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272,1272, +1272,1272,1272,1272,1272,1273,1273,1273,1274,1274,1274,1274,1274,1274,1274,1274, +1273,1273,1275,1274,1274,1273,1276,1272,1272,1272,1272,1277,1277,1278,1279,1279, +1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1278,1278,196,1279,1281,1272, +1272,1272,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 182 */ -1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189, -1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189, -1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1189,1190, -1191,1191,1192,1192,1192,1192,163,163,1191,1191,1191,1191,1192,1192,1191,1193, -1194,1195,1196,1196,1197,1197,1198,1198,1198,1196,1196,1196,1196,1196,1196,1196, -1196,1196,1196,1196,1196,1196,1196,1196,1189,1189,1189,1189,1192,1192,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282, +1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282, +1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282, +1283,1284,1284,1285,1285,1285,1285,1285,1285,1284,1285,1284,1284,1283,1284,1285, +1285,1284,1286,1287,1282,1282,1288,1282,196,196,196,196,196,196,196,196, +1289,1289,1289,1289,1289,1289,1289,1289,1289,1289,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 183 */ -1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199, -1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199, -1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199,1199, -1200,1200,1200,1201,1201,1201,1201,1201,1201,1201,1201,1200,1200,1201,1200,1202, -1201,1203,1203,1204,1199,163,163,163,163,163,163,163,163,163,163,163, -1205,1205,1205,1205,1205,1205,1205,1205,1205,1205,163,163,163,163,163,163, -531,531,531,531,531,531,531,531,531,531,531,531,531,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290, +1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290, +1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1290,1291, +1292,1292,1293,1293,1293,1293,196,196,1292,1292,1292,1292,1293,1293,1292,1294, +1295,1296,1297,1297,1298,1298,1299,1299,1299,1297,1297,1297,1297,1297,1297,1297, +1297,1297,1297,1297,1297,1297,1297,1297,1290,1290,1290,1290,1293,1293,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 184 */ -1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206, -1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206, -1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1206,1207,1208,1207,1208,1208, -1207,1207,1207,1207,1207,1207,1209,1210,1206,1211,163,163,163,163,163,163, -1212,1212,1212,1212,1212,1212,1212,1212,1212,1212,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300, +1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300, +1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300, +1301,1301,1301,1302,1302,1302,1302,1302,1302,1302,1302,1301,1301,1302,1301,1303, +1302,1304,1304,1305,1300,196,196,196,196,196,196,196,196,196,196,196, +1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,196,196,196,196,196,196, +581,581,581,581,581,581,581,581,581,581,581,581,581,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 185 */ -1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213, -1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,1213,163,163,1214,1214,1214, -1215,1215,1214,1214,1214,1214,1216,1214,1214,1214,1214,1217,163,163,163,163, -1218,1218,1218,1218,1218,1218,1218,1218,1218,1218,1219,1219,1220,1220,1220,1221, -1213,1213,1213,1213,1213,1213,1213,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307, +1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307, +1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1307,1308,1309,1308,1309,1309, +1308,1308,1308,1308,1308,1308,1310,1311,1307,1312,196,196,196,196,196,196, +1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,196,196,196,196,196,196, +522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522, +522,522,522,522,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 186 */ -1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222, -1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222, -1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1222,1223,1223,1223,1224, -1224,1224,1224,1224,1224,1224,1224,1224,1223,1225,1226,1227,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314, +1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,196,196,1315,1316,1315, +1317,1317,1315,1315,1315,1315,1316,1315,1315,1315,1315,1318,196,196,196,196, +1319,1319,1319,1319,1319,1319,1319,1319,1319,1319,1320,1320,1321,1321,1321,1322, +1314,1314,1314,1314,1314,1314,1314,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 187 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228, -1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228,1228, -1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229, -1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229,1229, -1230,1230,1230,1230,1230,1230,1230,1230,1230,1230,1231,1231,1231,1231,1231,1231, -1231,1231,1231,163,163,163,163,163,163,163,163,163,163,163,163,1232, +1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323, +1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323, +1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1324,1324,1324,1325, +1325,1325,1325,1325,1325,1325,1325,1325,1324,1326,1327,1328,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 188 */ -1233,1233,1233,1233,1233,1233,1233,163,163,1233,163,163,1233,1233,1233,1233, -1233,1233,1233,1233,163,1233,1233,163,1233,1233,1233,1233,1233,1233,1233,1233, -1233,1233,1233,1233,1233,1233,1233,1233,1233,1233,1233,1233,1233,1233,1233,1233, -1234,1235,1235,1235,1235,1235,163,1235,1235,163,163,1236,1236,1237,1238,1239, -1235,1239,1235,1240,1241,1242,1241,163,163,163,163,163,163,163,163,163, -1243,1243,1243,1243,1243,1243,1243,1243,1243,1243,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329, +1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329,1329, +1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330, +1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330, +1331,1331,1331,1331,1331,1331,1331,1331,1331,1331,1332,1332,1332,1332,1332,1332, +1332,1332,1332,196,196,196,196,196,196,196,196,196,196,196,196,1333, /* block 189 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1244,1244,1244,1244,1244,1244,1244,1244,163,163,1244,1244,1244,1244,1244,1244, -1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244, -1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244,1244, -1244,1245,1245,1245,1246,1246,1246,1246,163,163,1246,1246,1245,1245,1245,1245, -1247,1244,1248,1244,1245,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1334,1334,1334,1334,1334,1334,1334,196,196,1334,196,196,1334,1334,1334,1334, +1334,1334,1334,1334,196,1334,1334,196,1334,1334,1334,1334,1334,1334,1334,1334, +1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,1334, +1335,1336,1336,1336,1336,1336,196,1336,1336,196,196,1337,1337,1338,1339,1340, +1336,1340,1336,1341,1342,1343,1342,196,196,196,196,196,196,196,196,196, +1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 190 */ -1249,1250,1250,1250,1250,1250,1250,1251,1251,1250,1250,1249,1249,1249,1249,1249, -1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249, -1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249,1249, -1249,1249,1249,1252,1253,1250,1250,1250,1250,1254,1255,1250,1250,1250,1250,1256, -1256,1256,1257,1257,1256,1256,1256,1253,163,163,163,163,163,163,163,163, -1258,1259,1259,1259,1259,1259,1259,1260,1260,1259,1259,1259,1258,1258,1258,1258, -1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258, -1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258,1258, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1345,1345,1345,1345,1345,1345,1345,1345,196,196,1345,1345,1345,1345,1345,1345, +1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345, +1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345,1345, +1345,1346,1346,1346,1347,1347,1347,1347,196,196,1347,1347,1346,1346,1346,1346, +1348,1345,1349,1345,1346,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 191 */ -1258,1258,1258,1258,1261,1261,1261,1261,1261,1261,1259,1259,1259,1259,1259,1259, -1259,1259,1259,1259,1259,1259,1259,1260,1262,1263,1264,1265,1265,1258,1264,1264, -1264,1266,1266,163,163,163,163,163,163,163,163,163,163,163,163,163, -496,496,496,496,496,496,496,496,496,496,496,496,496,496,496,496, -1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267, -1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267, -1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267,1267, -1267,1267,1267,1267,1267,1267,1267,1267,1267,163,163,163,163,163,163,163, +1350,1351,1351,1351,1351,1351,1351,1352,1352,1351,1351,1350,1350,1350,1350,1350, +1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350, +1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350,1350, +1350,1350,1350,1353,1354,1351,1351,1351,1351,1355,1356,1351,1351,1351,1351,1357, +1357,1357,1358,1358,1357,1357,1357,1354,196,196,196,196,196,196,196,196, +1359,1360,1360,1360,1360,1360,1360,1361,1361,1360,1360,1360,1359,1359,1359,1359, +1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359, +1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359, /* block 192 */ -343,343,343,343,343,343,343,343,343,343,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1359,1359,1359,1359,1362,1362,1362,1362,1362,1362,1360,1360,1360,1360,1360,1360, +1360,1360,1360,1360,1360,1360,1360,1361,1363,1364,1365,1366,1366,1359,1365,1365, +1365,1367,1367,196,196,196,196,196,196,196,196,196,196,196,196,196, +545,545,545,545,545,545,545,545,545,545,545,545,545,545,545,545, +1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368, +1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368, +1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368, +1368,1368,1368,1368,1368,1368,1368,1368,1368,196,196,196,196,196,196,196, /* block 193 */ -1268,1268,1268,1268,1268,1268,1268,1268,1268,163,1268,1268,1268,1268,1268,1268, -1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268, -1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1268,1269, -1270,1270,1270,1270,1270,1270,1270,163,1270,1270,1270,1270,1270,1270,1269,1271, -1268,1272,1272,1273,1274,1274,163,163,163,163,163,163,163,163,163,163, -1275,1275,1275,1275,1275,1275,1275,1275,1275,1275,1276,1276,1276,1276,1276,1276, -1276,1276,1276,1276,1276,1276,1276,1276,1276,1276,1276,1276,1276,163,163,163, -1277,1278,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279, +385,385,385,385,385,385,385,385,385,385,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 194 */ -1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279,1279, -163,163,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280,1280, -1280,1280,1280,1280,1280,1280,1280,1280,163,1281,1280,1280,1280,1280,1280,1280, -1280,1281,1280,1280,1281,1280,1280,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, +1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, +1369,1370,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1371,1371,1371,1371,1371,1371,1371,1371,1371,1371,196,196,196,196,196,196, /* block 195 */ -1282,1282,1282,1282,1282,1282,1282,163,1282,1282,163,1282,1282,1282,1282,1282, -1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282, -1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282,1282, -1282,1283,1283,1283,1283,1283,1283,163,163,163,1283,163,1283,1283,163,1283, -1283,1283,1284,1283,1285,1285,1286,1283,163,163,163,163,163,163,163,163, -1287,1287,1287,1287,1287,1287,1287,1287,1287,1287,163,163,163,163,163,163, -1288,1288,1288,1288,1288,1288,163,1288,1288,163,1288,1288,1288,1288,1288,1288, -1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1288, +1372,1372,1372,1372,1372,1372,1372,1372,1372,196,1372,1372,1372,1372,1372,1372, +1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372, +1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1372,1373, +1374,1374,1374,1374,1374,1374,1374,196,1374,1374,1374,1374,1374,1374,1373,1375, +1372,1376,1376,1377,1378,1378,196,196,196,196,196,196,196,196,196,196, +1379,1379,1379,1379,1379,1379,1379,1379,1379,1379,1380,1380,1380,1380,1380,1380, +1380,1380,1380,1380,1380,1380,1380,1380,1380,1380,1380,1380,1380,196,196,196, +1381,1382,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383, /* block 196 */ -1288,1288,1288,1288,1288,1288,1288,1288,1288,1288,1289,1289,1289,1289,1289,163, -1290,1290,163,1289,1289,1290,1289,1291,1288,163,163,163,163,163,163,163, -1292,1292,1292,1292,1292,1292,1292,1292,1292,1292,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383,1383, +196,196,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384, +1384,1384,1384,1384,1384,1384,1384,1384,196,1385,1384,1384,1384,1384,1384,1384, +1384,1385,1384,1384,1385,1384,1384,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 197 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1293,1293,1293,1293,1293,1293,1293,1293,1293,1293,1293,1293,1293,1293,1293,1293, -1293,1293,1293,1294,1294,1295,1295,1296,1296,163,163,163,163,163,163,163, +1386,1386,1386,1386,1386,1386,1386,196,1386,1386,196,1386,1386,1386,1386,1386, +1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386, +1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,1386, +1386,1387,1387,1387,1387,1387,1387,196,196,196,1387,196,1387,1387,196,1387, +1387,1387,1388,1387,1389,1389,1390,1387,196,196,196,196,196,196,196,196, +1391,1391,1391,1391,1391,1391,1391,1391,1391,1391,196,196,196,196,196,196, +1392,1392,1392,1392,1392,1392,196,1392,1392,196,1392,1392,1392,1392,1392,1392, +1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392, /* block 198 */ -1297,1297,1298,1299,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300, -1300,163,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300, -1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300,1300, -1300,1300,1300,1300,1299,1299,1297,1297,1297,1297,1297,163,163,163,1299,1299, -1297,1301,1302,1303,1303,1304,1304,1304,1304,1304,1304,1304,1304,1304,1304,1304, -1305,1305,1305,1305,1305,1305,1305,1305,1305,1305,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1393,1393,1393,1393,1393,196, +1394,1394,196,1393,1393,1394,1393,1395,1392,196,196,196,196,196,196,196, +1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 199 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -843,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,1306,1306, -388,388,1306,388,1306,390,390,390,390,390,390,390,390,391,391,391, -391,390,390,390,390,390,390,390,390,390,390,390,390,390,390,390, -390,390,163,163,163,163,163,163,163,163,163,163,163,163,163,1307, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1397,1397,1397,1397,1397,1397,1397,1397,1397,1397,1397,1397,1397,1397,1397,1397, +1397,1397,1397,1398,1398,1399,1399,1400,1400,196,196,196,196,196,196,196, /* blockblock 201 */ -1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308, -1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +914,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1411,1411,1411,1411,1411,1411,1411,1411,1411,1411,1411,1411,1411,1411,1411,1411, +435,435,1411,435,1411,437,437,437,437,437,437,437,437,438,438,438, +438,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437, +437,437,196,196,196,196,196,196,196,196,196,196,196,196,196,1412, /* block 202 */ -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309, -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309, -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309, -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309, -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309, -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309, -1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,1309,163, -1310,1310,1310,1310,1310,163,163,163,163,163,163,163,163,163,163,163, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, /* block 203 */ -1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308, -1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308, -1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308, -1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308,1308, -1308,1308,1308,1308,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 204 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311, -1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311, -1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311, -1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311, -1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311, -1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311,1311, -1311,1312,1312,163,163,163,163,163,163,163,163,163,163,163,163,163, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414, +1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,1414,196, +1415,1415,1415,1415,1415,196,196,196,196,196,196,196,196,196,196,196, /* blockblock 206 */ -1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313, -1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313, -1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313,1313, -1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314, -1315,1313,1313,1313,1313,1313,1313,1316,1316,1316,1316,1316,1316,1316,1316,1316, -1316,1316,1316,1316,1316,1316,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416, +1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416, +1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416, +1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416, +1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416, +1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416,1416, +1416,1417,1417,196,196,196,196,196,196,196,196,196,196,196,196,196, /* blockblock 208 */ -1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317, -1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317, -1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317, -1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317,1317, -1317,1317,1317,1317,1317,1317,1317,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1419,1419,1419,1419,1419,1419,1419,1419,1419,1419,1419,1419,1419,1419,1419,1419, +1420,1418,1418,1418,1418,1418,1418,1421,1421,1421,1421,1421,1421,1421,1421,1421, +1421,1421,1421,1421,1421,1421,196,196,196,196,196,196,196,196,196,196, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, /* block 209 */ -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418, +1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,1418,196,196,196,196,196, /* block 210 */ -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,859,859,859,859,859,859,859, -859,859,859,859,859,859,859,859,859,163,163,163,163,163,163,163, -1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318, -1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,1318,163, -1319,1319,1319,1319,1319,1319,1319,1319,1319,1319,163,163,163,163,1320,1320, -1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, /* block 211 */ -1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321, -1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321, -1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321, -1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,1321,163, -1322,1322,1322,1322,1322,1322,1322,1322,1322,1322,163,163,163,163,163,163, -1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323, -1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,1323,163,163, -1324,1324,1324,1324,1324,1325,163,163,163,163,163,163,163,163,163,163, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +1422,1422,1422,1422,1422,1422,1422,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 212 */ -1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326, -1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326, -1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326, -1327,1327,1327,1327,1327,1327,1327,1328,1328,1329,1330,1330,1331,1331,1331,1331, -1332,1332,1333,1333,1328,1331,163,163,163,163,163,163,163,163,163,163, -1334,1334,1334,1334,1334,1334,1334,1334,1334,1334,163,1335,1335,1335,1335,1335, -1335,1335,163,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326, -1326,1326,1326,1326,1326,1326,1326,1326,163,163,163,163,163,1326,1326,1326, +1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423, +1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1423,1424,1424, +1424,1424,1424,1424,1424,1424,1424,1424,1424,1424,1425,1425,1425,1424,1424,1426, +1427,1427,1427,1427,1427,1427,1427,1427,1427,1427,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 213 */ -1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326,1326, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, /* block 214 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336, -1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336,1336, -1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337, -1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337,1337, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,930,930,930,930,930,930,930, +930,930,930,930,930,930,930,930,930,196,196,196,196,196,196,196, +1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428, +1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,1428,196, +1429,1429,1429,1429,1429,1429,1429,1429,1429,1429,196,196,196,196,1430,1430, +1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431, /* block 215 */ -1338,1338,1338,1338,1338,1338,1338,1338,1338,1338,1338,1338,1338,1338,1338,1338, -1338,1338,1338,1338,1338,1338,1338,1339,1340,1341,1341,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431, +1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431, +1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431, +1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,196, +1432,1432,1432,1432,1432,1432,1432,1432,1432,1432,196,196,196,196,196,196, +1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433, +1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,1433,196,196, +1434,1434,1434,1434,1434,1435,196,196,196,196,196,196,196,196,196,196, /* block 216 */ -1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342, -1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342, -1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342, -1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342, -1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,1342,163,163,163,163,1343, -1342,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344, -1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344, -1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344, +1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436, +1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436, +1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436, +1437,1437,1437,1437,1437,1437,1437,1438,1438,1439,1440,1440,1441,1441,1441,1441, +1442,1442,1443,1443,1438,1441,196,196,196,196,196,196,196,196,196,196, +1444,1444,1444,1444,1444,1444,1444,1444,1444,1444,196,1445,1445,1445,1445,1445, +1445,1445,196,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436, +1436,1436,1436,1436,1436,1436,1436,1436,196,196,196,196,196,1436,1436,1436, /* block 217 */ -1344,1344,1344,1344,1344,1344,1344,1344,163,163,163,163,163,163,163,1345, -1345,1345,1345,1346,1346,1346,1346,1346,1346,1346,1346,1346,1346,1346,1346,1346, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1347,1348,1349,800,1350,163,163,163,163,163,163,163,163,163,163,163, -1351,1351,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436,1436, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* blockblockblockblock 221 */ -1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353, -1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353, -1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353, -1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353, -1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353,1353, -1353,1353,1353,1353,1353,1353,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459, +1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459, +1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459, +1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459, +1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,1459,196,196,196,196,1460, +1459,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461, +1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461, +1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461,1461, /* block 222 */ -1352,1352,1352,1352,1352,1352,1352,1352,1352,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1461,1461,1461,1461,1461,1461,1461,1461,196,196,196,196,196,196,196,1462, +1462,1462,1462,1463,1463,1463,1463,1463,1463,1463,1463,1463,1463,1463,1463,1463, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1464,1465,1466,867,1467,196,196,196,196,196,196,196,196,196,196,196, +1468,1468,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 223 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1354,1354,1354,1354,163,1354,1354,1354,1354,1354,1354,1354,163,1354,1354,163, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, /* block 224 */ -825,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469,1469, +1469,1469,1469,1469,1469,1469,1469,1469,196,196,196,196,196,196,196,196, /* block 225 */ -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, /* block 226 */ -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820, -825,825,825,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,820,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -820,820,820,163,163,825,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,825,825,825,825,163,163,163,163,163,163,163,163, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470,1470, +1470,1470,1470,1470,1470,1470,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,1470, /* block 227 */ -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, +1469,1469,1469,1469,1469,1469,1469,1469,1469,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 228 */ -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355, -1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,1355,163,163,163,163, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1471,1471,1471,1471,196,1471,1471,1471,1471,1471,1471,1471,196,1471,1471,196, /* block 229 */ -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,163,163,163,163,163, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,163,163,163, +896,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, /* block 230 */ -1356,1356,1356,1356,1356,1356,1356,1356,1356,163,163,163,163,163,163,163, -1356,1356,1356,1356,1356,1356,1356,1356,1356,1356,163,163,1357,1358,1359,1360, -1361,1361,1361,1361,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, /* block 231 */ -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,163,163, -154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, -154,154,154,154,154,154,154,163,163,163,163,163,163,163,163,163, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891, +896,896,896,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,891,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +891,891,891,196,196,896,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,896,896,896,896,196,196,196,196,196,196,196,196, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, /* block 232 */ -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, /* block 233 */ -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472, +1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,1472,196,196,196,196, /* block 234 */ -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,163,163,163,163,163,163,163,163,163,163, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,196,196,196,196,196, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,196,196,196, /* block 235 */ -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,163,163,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,1362,1363,154,154,154,461,461,461,1364,1365,1365, -1365,1365,1365, 51, 51, 51, 51, 51, 51, 51, 51,154,154,154,154,154, +1473,1473,1473,1473,1473,1473,1473,1473,1473,196,196,196,196,196,196,196, +1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,196,196,1474,1475,1476,1477, +1478,1478,1478,1478,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 236 */ -154,154,154,461,461,154,154,154,154,154,154,154,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,154,154,154,154,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,724,724,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, /* block 237 */ -1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001, -1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001, -1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001, -1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001,1001, -1001,1001,1366,1366,1366,1001,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +1479,1479,1479,1479,1479,1479,1479,1479,1479,1479,196,196,196,196,196,196, /* block 238 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -836,836,836,836,836,836,836,836,836,836,836,836,836,836,836,836, -836,836,836,836,163,163,163,163,163,163,163,163,163,163,163,163, -836,836,836,836,836,836,836,836,836,836,836,836,836,836,836,836, -836,836,836,836,163,163,163,163,163,163,163,163,163,163,163,163, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 239 */ -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,163,163,163,163,163,163,163,163,163, -833,833,833,833,833,833,833,833,833,833,833,833,833,833,833,833, -833,833,836,836,836,836,836,836,836,163,163,163,163,163,163,163, +176,176,176,176,176,176,176,176,176,176,176,176,176,176,176,176, +176,176,176,176,176,176,176,176,176,176,176,176,176,176,176,176, +176,176,176,176,176,176,176,176,176,176,176,176,176,176,196,196, +176,176,176,176,176,176,176,176,176,176,176,176,176,176,176,176, +176,176,176,176,176,176,176,196,196,196,196,196,196,196,196,196, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, /* block 240 */ -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,726,726,726,726,726,726, -726,726,737,737,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,726,726, -726,726,726,726,726,163,737,737,726,726,726,726,726,726,726,726, -726,726,726,726,726,726,726,726,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 241 */ -725,725,726,726,726,726,726,726,726,726,737,737,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,726,726,725,163,725,725, -163,163,725,163,163,725,725,163,163,725,725,725,725,163,725,725, -725,725,725,725,725,725,726,726,726,726,163,726,163,726,737,737, -726,726,726,726,163,726,726,726,726,726,726,726,726,726,726,726, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,726,726,726,726,726,726, -726,726,737,737,726,726,726,726,726,726,726,726,726,726,726,726, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, /* block 242 */ -726,726,726,726,725,725,163,725,725,725,725,163,163,725,725,725, -725,725,725,725,725,163,725,725,725,725,725,725,725,163,726,726, -726,726,726,726,726,726,737,737,726,726,726,726,726,726,726,726, -726,726,726,726,726,726,726,726,725,725,163,725,725,725,725,163, -725,725,725,725,725,163,725,163,163,163,725,725,725,725,725,725, -725,163,726,726,726,726,726,726,726,726,737,737,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,726,726,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,196,196,196,196,196,196,196,196,196,196, /* block 243 */ -725,725,725,725,725,725,726,726,726,726,726,726,726,726,737,737, -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,726,726,726,726,726,726, -726,726,737,737,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,726,726, -726,726,726,726,726,726,737,737,726,726,726,726,726,726,726,726, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,196,196,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,1480,1480,176,176,176,510,510,510,1481,1481,1481, +1481,1481,1481, 51, 51, 51, 51, 51, 51, 51, 51,176,176,176,176,176, /* block 244 */ -726,726,726,726,726,726,726,726,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,726,726,726,726,726,726,726,726,737,737,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,726,726,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,726,726,726,726,726,726,726,726,737,737, -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, +176,176,176,510,510,176,176,176,176,176,176,176,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,176,176,176,176,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,781,781,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 245 */ -725,725,725,725,725,725,725,725,725,725,726,726,726,726,726,726, -726,726,737,737,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,726,726,163,163,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,1367,726,726,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,726,716,726,726,726,726, -726,726,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,1367,726,726,726,726, +1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077, +1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077, +1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077, +1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077,1077, +1077,1077,1482,1482,1482,1077,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 246 */ -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,726,716,726,726,726,726,726,726,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,1367,726,726,726,726,726,726,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,716, -726,726,726,726,726,726,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,1367, -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, +907,907,907,907,196,196,196,196,196,196,196,196,196,196,196,196, +907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907, +907,907,907,907,196,196,196,196,196,196,196,196,196,196,196,196, /* block 247 */ -726,726,726,726,726,726,726,726,726,716,726,726,726,726,726,726, -725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725, -725,725,725,725,725,725,725,725,725,1367,726,726,726,726,726,726, -726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726, -726,726,726,716,726,726,726,726,726,726,725,726,163,163,1368,1368, -1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368, -1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368, -1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368,1368, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,196,196,196,196,196,196,196,196,196, +904,904,904,904,904,904,904,904,904,904,904,904,904,904,904,904, +904,904,907,907,907,907,907,907,907,196,196,196,196,196,196,196, /* block 248 */ -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, -1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,783,783,783,783,783,783, +783,783,795,795,783,783,783,783,783,783,783,783,783,783,783,783, +783,783,783,783,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,783,783, +783,783,783,783,783,196,795,795,783,783,783,783,783,783,783,783, +783,783,783,783,783,783,783,783,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, /* block 249 */ -1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370, -1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370, -1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370, -1370,1370,1370,1370,1370,1370,1370,1369,1369,1369,1369,1370,1370,1370,1370,1370, -1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370, -1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370, -1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1369,1369,1369, -1369,1369,1369,1369,1369,1370,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369, +782,782,783,783,783,783,783,783,783,783,795,795,783,783,783,783, +783,783,783,783,783,783,783,783,783,783,783,783,782,196,782,782, +196,196,782,196,196,782,782,196,196,782,782,782,782,196,782,782, +782,782,782,782,782,782,783,783,783,783,196,783,196,783,795,795, +783,783,783,783,196,783,783,783,783,783,783,783,783,783,783,783, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,783,783,783,783,783,783, +783,783,795,795,783,783,783,783,783,783,783,783,783,783,783,783, /* block 250 */ -1369,1369,1369,1369,1370,1369,1369,1371,1372,1371,1371,1373,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,1370,1370,1370,1370,1370, -163,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370,1370, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +783,783,783,783,782,782,196,782,782,782,782,196,196,782,782,782, +782,782,782,782,782,196,782,782,782,782,782,782,782,196,783,783, +783,783,783,783,783,783,795,795,783,783,783,783,783,783,783,783, +783,783,783,783,783,783,783,783,782,782,196,782,782,782,782,196, +782,782,782,782,782,196,782,196,196,196,782,782,782,782,782,782, +782,196,783,783,783,783,783,783,783,783,795,795,783,783,783,783, +783,783,783,783,783,783,783,783,783,783,783,783,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, /* block 251 */ - 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 92, 70, 70, 70, 70, 70, - 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,644, 70, 70, 70, 70,163, -163,163,163,163,163, 70, 70, 70, 70, 70, 70,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +782,782,782,782,782,782,783,783,783,783,783,783,783,783,795,795, +783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,783,783,783,783,783,783, +783,783,795,795,783,783,783,783,783,783,783,783,783,783,783,783, +783,783,783,783,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,783,783, +783,783,783,783,783,783,795,795,783,783,783,783,783,783,783,783, /* block 252 */ -1374,1374,1374,1374,1374,1374,1374,163,1374,1374,1374,1374,1374,1374,1374,1374, -1374,1374,1374,1374,1374,1374,1374,1374,1374,163,163,1374,1374,1374,1374,1374, -1374,1374,163,1374,1374,163,1374,1374,1374,1374,1374,163,163,163,163,163, -858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,858, -858,858,858,858,858,858,858,858,858,858,858,858,1375,1375,858,858, -858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,858, -858,858,858,858,858,858,858,858,1375,858,858,858,858,858,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +783,783,783,783,783,783,783,783,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,783,783,783,783,783,783,783,783,795,795,783,783,783,783, +783,783,783,783,783,783,783,783,783,783,783,783,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,783,783,783,783,783,783,783,783,795,795, +783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, /* block 253 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,788, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +782,782,782,782,782,782,782,782,782,782,783,783,783,783,783,783, +783,783,795,795,783,783,783,783,783,783,783,783,783,783,783,783, +783,783,783,783,783,783,196,196,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,1483,783,783,783,783,783,783,783,783,783,783,783,783,783,783, +783,783,783,783,783,783,783,783,783,783,783,1483,783,783,783,783, +783,783,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,1483,783,783,783,783, /* block 254 */ -1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376, -1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376, -1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,1376,163,163,163, -1377,1377,1377,1377,1377,1377,1377,1378,1378,1378,1378,1378,1379,1379,163,163, -1380,1380,1380,1380,1380,1380,1380,1380,1380,1380,163,163,163,163,1376,1381, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, +783,783,783,783,783,1483,783,783,783,783,783,783,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,1483,783,783,783,783,783,783,783,783,783,783, +783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,1483, +783,783,783,783,783,783,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,1483, +783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, /* block 255 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382, -1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1382,1383,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384, -1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384, -1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1384,1385,1385,1385,1385, -1386,1386,1386,1386,1386,1386,1386,1386,1386,1386,163,163,163,163,163,1387, +783,783,783,783,783,783,783,783,783,1483,783,783,783,783,783,783, +782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782, +782,782,782,782,782,782,782,782,782,1483,783,783,783,783,783,783, +783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783, +783,783,783,1483,783,783,783,783,783,783,782,783,196,196,1484,1484, +1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484, +1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484, +1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484,1484, /* block 256 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388, -1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1388,1389,1390,1390,1390,1390, -1391,1391,1391,1391,1391,1391,1391,1391,1391,1391,163,163,163,163,163,163, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, +1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, /* block 257 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -484,484,484,484,484,484,484,163,484,484,484,484,163,484,484,163, -484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,163, +1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486, +1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486, +1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486, +1486,1486,1486,1486,1486,1486,1486,1485,1485,1485,1485,1486,1486,1486,1486,1486, +1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486, +1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486, +1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1486,1485,1485,1485, +1485,1485,1485,1485,1485,1486,1485,1485,1485,1485,1485,1485,1485,1485,1485,1485, /* blockblock 259 */ -1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392, -1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392, -1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392, -1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392,1392, -1392,1392,1392,1392,1392,262,262,1393,1393,1393,1393,1393,1393,1393,1393,1393, -1394,1394,1394,1394,1394,1394,1394,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 93, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,694, 70, 70, 70, 70,196, +196,196,196,196,196, 70, 70, 70, 70, 70, 70,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 260 */ -1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395, -1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395,1395, -1395,1395,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396, -1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396,1396, -1396,1396,1396,1396,1397,1397,1397,1398,1399,1399,1399,1400,262,262,262,262, -1401,1401,1401,1401,1401,1401,1401,1401,1401,1401,262,262,262,262,1402,1402, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1490,1490,1490,1490,1490,1490,1490,196,1490,1490,1490,1490,1490,1490,1490,1490, +1490,1490,1490,1490,1490,1490,1490,1490,1490,196,196,1490,1490,1490,1490,1490, +1490,1490,196,1490,1490,196,1490,1490,1490,1490,1490,196,196,196,196,196, +929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, +929,929,929,929,929,929,929,929,929,929,929,929,1491,1491,929,929, +929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929, +929,929,929,929,929,929,929,929,1491,929,929,929,929,929,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 261 */ -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -302,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,849, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 262 */ -1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403, -1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403, -1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1404,1403,1403,1403, -1405,1403,1403,1403,1403,302,302,302,302,302,302,302,302,302,302,302, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492, +1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492, +1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,1492,196,196,196, +1493,1493,1493,1493,1493,1493,1493,1494,1494,1494,1494,1494,1495,1495,196,196, +1496,1496,1496,1496,1496,1496,1496,1496,1496,1496,196,196,196,196,1492,1497, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 263 */ -302,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403, -1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403, -1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1404,1403, -1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,1403,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, -262,262,262,262,262,262,262,262,262,262,262,262,262,262,262,262, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498, +1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1498,1499,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500, +1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500, +1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1500,1501,1501,1501,1501, +1502,1502,1502,1502,1502,1502,1502,1502,1502,1502,196,196,196,196,196,1503, /* block 264 */ -1406,1406,1406,1406,302,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406, -1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406, -302,1406,1406,302,1406,302,302,1406,302,1406,1406,1406,1406,1406,1406,1406, -1406,1406,1406,302,1406,1406,1406,1406,302,1406,302,1406,302,302,302,302, -302,302,1406,302,302,302,302,1406,302,1406,302,1406,302,1406,1406,1406, -302,1406,1406,302,1406,302,302,1406,302,1406,302,1406,302,1406,302,1406, -302,1406,1406,302,1406,302,302,1406,1406,1406,1406,302,1406,1406,1406,1406, -1406,1406,1406,302,1406,1406,1406,1406,302,1406,1406,1406,1406,302,1406,302, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504, +1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1504,1505,1506,1506,1506,1506, +1507,1507,1507,1507,1507,1507,1507,1507,1507,1507,196,196,196,196,196,196, /* block 265 */ -1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,302,1406,1406,1406,1406,1406, -1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,302,302,302,302, -302,1406,1406,1406,302,1406,1406,1406,1406,1406,302,1406,1406,1406,1406,1406, -1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,1406,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302, -274,274,302,302,302,302,302,302,302,302,302,302,302,302,302,302, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508, +1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1508,1509,1510, +1508,1511,1511,1511,1511,1511,1511,1511,1511,1511,1511,196,196,196,196,1512, /* block 266 */ -1407,1407,1407,1407,1408,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1409,1409,1409,1409, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +533,533,533,533,533,533,533,196,533,533,533,533,196,533,533,196, +533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,196, /* block 267 */ -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1409, -1409,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1409,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1408, -1409,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, /* block 268 */ - 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 58, 58,1407,1407,1407, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,1407, -1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410, -1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,461,461,461,461,461,461, -1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410, -1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,724,724,1407,1407,1407,1407, -1411,1411,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,1411,1411, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513,1513, +1513,1513,1513,1513,1513,297,297,1514,1514,1514,1514,1514,1514,1514,1514,1514, +1515,1515,1515,1515,1515,1515,1515,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 269 */ -1410,1410,1410,1410,1410,1410,1410,1410,1410,1410,461,461,461,461,1412,461, -461,1412,1412,1412,1412,1412,1412,1412,1412,1412,1412,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,1407,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, -1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413,1413, +1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516, +1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516,1516, +1516,1516,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517, +1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517,1517, +1517,1517,1517,1517,1518,1518,1518,1519,1520,1520,1520,1521,297,297,297,297, +1522,1522,1522,1522,1522,1522,1522,1522,1522,1522,297,297,297,297,1523,1523, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 270 */ -1414,1412,1415,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -461,461,461,461,461,461,461,461,461,461,1412,461,461,461,461,461, -461,461,461,461,461,461,461,461,461,461,461,461,461,461,461,1412, -461,461,1412,1412,1412,1412,1412,1415,1412,1412,1412,461,1409,1409,1409,1409, -461,461,461,461,461,461,461,461,461,1409,1409,1409,1409,1409,1409,1409, -1416,1416,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1407,1407,1407,1407,1407,1407,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 271 */ -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +340,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524, /* block 272 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,728,1407,1407,728,728,728,728,728,728,728,728,728,1408,1408,1408, -1408,1408,1408,1408,1408,1408,728,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,728,1408,1408, +1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524, +1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524, +1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1525,1524,1524,1524, +1526,1524,1524,1524,1524,340,340,340,340,340,340,340,340,340,340,340, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 273 */ -1408,1408,1408,1408,1408,1417,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1407,1407,728,728,1407,728,728,728,1407,1407,728,728, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1417,1417,1417,1408,1408,1417,1408,1408,1417,1418,1418,728,728,1408, -1408,1408,1408,1408,728,728,728,728,728,728,728,728,728,728,728,728, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1407,1407,728,1408,728,1407,728,1408,1408,1408,1419,1419,1419,1419,1419, +340,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524, +1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524, +1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1525,1524, +1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,1524,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, +297,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297, /* block 274 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,728, -1408,728,1417,1417,1408,1408,1417,1417,1417,1417,1417,1417,1417,1417,1417,1417, -1417,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1417,1417,1417,1417,1417,1417,1417,1417,1417,1417, -1417,1417,1417,1417,1417,1417,1417,1417,1417,1408,1408,1408,1417,1408,1408,1408, +1527,1527,1527,1527,340,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527, +1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527, +340,1527,1527,340,1527,340,340,1527,340,1527,1527,1527,1527,1527,1527,1527, +1527,1527,1527,340,1527,1527,1527,1527,340,1527,340,1527,340,340,340,340, +340,340,1527,340,340,340,340,1527,340,1527,340,1527,340,1527,1527,1527, +340,1527,1527,340,1527,340,340,1527,340,1527,340,1527,340,1527,340,1527, +340,1527,1527,340,1527,340,340,1527,1527,1527,1527,340,1527,1527,1527,1527, +1527,1527,1527,340,1527,1527,1527,1527,340,1527,1527,1527,1527,340,1527,340, /* block 275 */ -1408,1417,1417,1417,1408,1417,1417,1417,1408,1408,1408,1408,1408,1408,1408,1417, -1408,1417,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1417,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,728,1407,1408, +1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,340,1527,1527,1527,1527,1527, +1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,340,340,340,340, +340,1527,1527,1527,340,1527,1527,1527,1527,1527,340,1527,1527,1527,1527,1527, +1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340, +309,309,340,340,340,340,340,340,340,340,340,340,340,340,340,340, /* block 276 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,724,724, -724,724,724,724,724,724,1407,1407,1407,728,728,1408,1408,1408,1408,1407, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1407,1407,1407,1407,1407,1407,1407,728, -728,1407,1407,728,1418,1418,728,728,728,728,1417,1407,1407,1407,1407,1407, +1528,1528,1528,1528,1529,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1530,1530,1530,1530, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, /* block 277 */ -1407,1407,1407,1407,1407,1407,1407,728,1407,1407,728,728,728,728,1407,1407, -1418,1407,1407,1407,1407,1417,1417,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1408,728,1407,1407,728,1407,1407,1407,1407,1407,1407,1407, -1407,728,728,1407,1407,1407,1407,1407,1407,1407,1407,1407,728,1407,1407,1407, -1407,1407,728,728,728,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,728,728,728,1407,1407,1407,1407,1407,1407,1407,1407,728,728,728,1407, -1407,728,1407,728,1407,1407,1407,1407,728,1407,1407,1407,1407,1407,1407,728, -1407,1407,1407,728,1407,1407,1407,1407,1407,1407,728,1408,1408,1408,1408,1408, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1530, +1530,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1530,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1529, +1530,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, /* block 278 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1417,1417,1417,1408,1408,1408,1417,1417,1417,1417,1417, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, +814,814,814,814,814,814,814,814,814,814,814, 58, 58,1528,1528,1528, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,1528, +1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531, +1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,510,510,510,510,510,510, +1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531, +1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,781,781,1528,1528,1528,1528, +1532,1532,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,1532,1532, /* block 279 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1417,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1417,1417,1417,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1417,1408,1408,1408,1408,1408,1407,1407,1407,1407,1407,728,1417,728,728,728, -1408,1408,1408,1407,1407,1408,1408,1408,1409,1409,1409,1409,1408,1408,1408,1408, -728,728,728,728,728,728,1407,1407,1407,728,1407,1408,1408,1409,1409,1409, -728,1407,1407,728,1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1409,1409, +1531,1531,1531,1531,1531,1531,1531,1531,1531,1531,510,510,510,510,1533,510, +510,1533,1533,1533,1533,1533,1533,1533,1533,1533,1533,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,1528,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534, +1534,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534,1534, /* block 280 */ -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,1407,1407,1407,1409,1409,1409,1409,1407,1407,1407,1407,1407, +1535,1533,1536,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +510,510,510,510,510,510,510,510,510,510,1533,510,510,510,510,510, +510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,1533, +510,510,1533,1533,1533,1533,1533,1536,1533,1533,1533,510,1530,1530,1530,1530, +510,510,510,510,510,510,510,510,510,1530,1530,1530,1530,1530,1530,1530, +1537,1537,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1528,1528,1528,1528,1528,1528,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, /* block 281 */ -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,1407,1407,1407,1407,1407,1409,1409,1409,1409,1409,1409, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1409,1409,1409, -1408,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, /* block 282 */ -724,724,724,724,724,724,724,724,724,724,724,724,1409,1409,1409,1409, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,1409,1409,1409,1409,1409,1409,1409,1409, -724,724,724,724,724,724,724,724,724,724,1409,1409,1409,1409,1409,1409, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,785,1528,1528,785,785,785,785,785,785,785,785,785,1529,1529,1529, +1529,1529,1529,1529,1529,1529,785,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,785,1529,1529, /* block 283 */ -724,724,724,724,724,724,724,724,1409,1409,1409,1409,1409,1409,1409,1409, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,1409,1409, -1407,1407,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, +1529,1529,1529,1529,1529,1538,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1528,1528,785,785,1528,785,785,785,1528,1528,785,785, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1538,1538,1538,1529,1529,1538,1529,1529,1538,1539,1539,785,785,1529, +1529,1529,1529,1529,785,785,785,785,785,785,785,785,785,785,785,785, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1528,1528,785,1529,785,1528,785,1529,1529,1529,1540,1540,1540,1540,1540, /* block 284 */ -724,724,724,724,724,724,724,724,724,724,724,724,1417,1408,1408,1417, -1408,1408,1408,1408,1408,1408,1408,1408,1417,1417,1417,1417,1417,1417,1417,1417, -1408,1408,1408,1408,1408,1408,1417,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1417,1417,1417,1417,1417,1417,1417,1417,1417,1417,1408,724,1417,1417,1417,1408, -1408,1408,1408,1408,1408,1408,724,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1417,1408,1408,1408,1408,1408,1408,1408,1408, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,785, +1529,785,1538,1538,1529,1529,1538,1538,1538,1538,1538,1538,1538,1538,1538,1538, +1538,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1538,1538,1538,1538,1538,1538,1538,1538,1538,1538, +1538,1538,1538,1538,1538,1538,1538,1538,1538,1529,1529,1529,1538,1529,1529,1529, /* block 285 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1420,1420,1420,1420,1408,1417,1417,1408,1417,1417,1408,1417,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1417,1417,1417, -1408,1417,1417,1417,1417,1417,1417,1417,1417,1417,1417,1417,1417,1417,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, +1529,1538,1538,1538,1529,1538,1538,1538,1529,1529,1529,1529,1529,1529,1529,1538, +1529,1538,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1538,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,785,1528,1529, /* block 286 */ -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407, -1407,1407,1407,1407,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1407,1409,1409, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1409,1409, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,781,781, +781,781,781,781,781,781,1528,1528,1528,785,785,1529,1529,1529,1529,1528, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1528,1528,1528,1528,1528,1528,1528,785, +785,1528,1528,785,1539,1539,785,785,785,785,1538,1528,1528,1528,1528,1528, /* block 287 */ -1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1409,1409,1409,1409,1409,1409, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1408, -1408,1408,1408,1417,1417,1417,1409,1409,1409,1409,1409,1409,1409,1409,1408,1408, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1409,1409,1409, -1408,1408,1408,1408,1408,1408,1408,1408,1408,1409,1409,1409,1409,1409,1409,1409, -1417,1417,1417,1417,1417,1417,1417,1417,1417,1409,1409,1409,1409,1409,1409,1409, +1528,1528,1528,1528,1528,1528,1528,785,1528,1528,785,785,785,785,1528,1528, +1539,1528,1528,1528,1528,1538,1538,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1529,785,1528,1528,785,1528,1528,1528,1528,1528,1528,1528, +1528,785,785,1528,1528,1528,1528,1528,1528,1528,1528,1528,785,1528,1528,1528, +1528,1528,785,785,785,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,785,785,785,1528,1528,1528,1528,1528,1528,1528,1528,785,785,785,1528, +1528,785,1528,785,1528,1528,1528,1528,785,1528,1528,1528,1528,1528,1528,785, +1528,1528,1528,785,1528,1528,1528,1528,1528,1528,785,1529,1529,1529,1529,1529, /* block 288 */ -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1538,1538,1538,1529,1529,1529,1538,1538,1538,1538,1538, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, /* block 289 */ -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,163,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,724,724,724,724,724, -724,724,724,724,724,724,724,724,724,724,724,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -1421,1421,1421,1421,1421,1421,1421,1421,1421,1421,163,163,163,163,163,163, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1538,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1538,1538,1538,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1538,1529,1529,1529,1529,1529,1528,1528,1528,1528,1528,785,1538,785,785,785, +1529,1529,1529,1528,1528,1529,1529,1529,1530,1530,1530,1530,1529,1529,1529,1529, +785,785,785,785,785,785,1528,1528,1528,785,1528,1529,1529,1530,1530,1530, +785,1528,1528,785,1529,1529,1529,1529,1529,1529,1529,1529,1529,1530,1530,1530, /* block 290 */ -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409, -1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,1409,957,957, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,1528,1528,1528,1530,1530,1530,1530,1528,1528,1528,1528,1528, /* block 291 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,1528,1528,1528,1528,1528,1530,1530,1530,1530,1530,1530, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1530,1530,1530,1530, +1529,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, /* block 292 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,163,163,163,163,163,163, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, +781,781,781,781,781,781,781,781,781,781,781,781,1530,1530,1530,1530, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,1530,1530,1530,1530,1530,1530,1530,1530, +781,781,781,781,781,781,781,781,781,781,1530,1530,1530,1530,1530,1530, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, /* block 293 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,163,163, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, +781,781,781,781,781,781,781,781,1530,1530,1530,1530,1530,1530,1530,1530, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,1530,1530, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1530,1530,1530,1530, +1528,1528,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, /* block 294 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, +781,781,781,781,781,781,781,781,781,781,781,781,1538,1529,1529,1538, +1529,1529,1529,1529,1529,1529,1529,1529,1538,1538,1538,1538,1538,1538,1538,1538, +1529,1529,1529,1529,1529,1529,1538,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1538,1538,1538,1538,1538,1538,1538,1538,1538,1538,1529,781,1538,1538,1538,1529, +1529,1529,1529,1529,1529,1529,781,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1538,1529,1529,1529,1529,1529,1529,1529,1529, /* block 295 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1541,1541,1541,1541,1529,1538,1538,1529,1538,1538,1529,1538,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1538,1538,1538, +1529,1538,1538,1538,1538,1538,1538,1538,1538,1538,1538,1538,1538,1538,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, /* block 296 */ -952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952, -952,952,952,952,952,952,952,952,952,952,952,952,952,952,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528, +1528,1528,1528,1528,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1528,1530,1530, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1530,1530,1530, /* block 297 */ -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,957,957, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1530,1530,1530,1530,1530,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529, +1529,1529,1529,1538,1538,1538,1529,1530,1530,1530,1530,1530,1530,1530,1529,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1530,1530,1529, +1529,1529,1529,1529,1529,1529,1529,1529,1529,1529,1530,1530,1530,1530,1530,1530, +1538,1538,1538,1538,1538,1538,1538,1538,1538,1530,1530,1530,1530,1530,1530,1530, /* block 298 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,163,163,163,163,163, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,196,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +781,781,781,781,781,781,781,781,781,781,781,781,781,781,781,781, +1479,1479,1479,1479,1479,1479,1479,1479,1479,1479,196,196,196,196,196,196, /* block 299 */ -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -839,839,839,839,839,839,839,839,839,839,839,839,839,839,839,839, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, -163,163,163,163,163,163,163,163,163,163,163,163,163,163,163,163, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530, +1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1530,1032,1032, /* block 300 */ -708,713,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, -1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, -1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, -1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, -1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, -1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422,1422, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, /* block 301 */ -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,196,196,196,196,196,196, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, /* block 302 */ -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,196,196, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, /* block 303 */ -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -960,960,960,960,960,960,960,960,960,960,960,960,960,960,960,960, -708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, /* block 304 */ -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,951,951, -951,951,951,951,951,951,951,951,951,951,951,951,951,951,957,957, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, + +/* block 305 */ +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, + +/* block 306 */ +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025, +1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,1025,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, + +/* block 307 */ +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,1032,1032, + +/* block 308 */ +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,196,196,196,196,196, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, + +/* block 309 */ +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +910,910,910,910,910,910,910,910,910,910,910,910,910,910,910,910, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, +196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196, + +/* block 310 */ +765,770,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542, +1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542, +1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542, +1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542, +1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542, +1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542,1542, + +/* block 311 */ +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, + +/* blockblock 313 */ +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035,1035, +765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765, + +/* block}; #if UCD_BLOCK_SIZE != 128 diff --git a/src/pcre2_ucp.h b/src/pcre2_ucp.h index 9ccc829..513c54e 100644 --- a/src/pcre2_ucp.h +++ b/src/pcre2_ucp.h @@ -132,13 +132,18 @@ enum { ucp_Hex_Digit, ucp_IDS_Binary_Operator, ucp_IDS_Trinary_Operator, + ucp_IDS_Unary_Operator, + ucp_ID_Compat_Math_Continue, + ucp_ID_Compat_Math_Start, ucp_ID_Continue, ucp_ID_Start, ucp_Ideographic, + ucp_InCB, ucp_Join_Control, ucp_Logical_Order_Exception, ucp_Lowercase, ucp_Math, + ucp_Modifier_Combining_Mark, ucp_Noncharacter_Code_Point, ucp_Pattern_Syntax, ucp_Pattern_White_Space, @@ -219,6 +224,8 @@ enum { ucp_Latin, ucp_Greek, ucp_Cyrillic, + ucp_Armenian, + ucp_Hebrew, ucp_Arabic, ucp_Syriac, ucp_Thaana, @@ -232,15 +239,21 @@ enum { ucp_Kannada, ucp_Malayalam, ucp_Sinhala, + ucp_Thai, + ucp_Tibetan, ucp_Myanmar, ucp_Georgian, ucp_Hangul, + ucp_Ethiopic, + ucp_Cherokee, + ucp_Runic, ucp_Mongolian, ucp_Hiragana, ucp_Katakana, ucp_Bopomofo, ucp_Han, ucp_Yi, + ucp_Gothic, ucp_Tagalog, ucp_Hanunoo, ucp_Buhid, @@ -248,21 +261,33 @@ enum { ucp_Limbu, ucp_Tai_Le, ucp_Linear_B, + ucp_Shavian, ucp_Cypriot, ucp_Buginese, ucp_Coptic, ucp_Glagolitic, + ucp_Tifinagh, ucp_Syloti_Nagri, ucp_Phags_Pa, ucp_Nko, ucp_Kayah_Li, + ucp_Lycian, + ucp_Carian, + ucp_Lydian, + ucp_Avestan, + ucp_Samaritan, + ucp_Lisu, ucp_Javanese, + ucp_Old_Turkic, ucp_Kaithi, ucp_Mandaic, ucp_Chakma, + ucp_Meroitic_Hieroglyphs, ucp_Sharada, ucp_Takri, + ucp_Caucasian_Albanian, ucp_Duployan, + ucp_Elbasan, ucp_Grantha, ucp_Khojki, ucp_Linear_A, @@ -274,7 +299,10 @@ enum { ucp_Khudawadi, ucp_Tirhuta, ucp_Multani, + ucp_Old_Hungarian, ucp_Adlam, + ucp_Osage, + ucp_Tangut, ucp_Masaram_Gondi, ucp_Dogra, ucp_Gunjala_Gondi, @@ -284,31 +312,28 @@ enum { ucp_Yezidi, ucp_Cypro_Minoan, ucp_Old_Uyghur, + ucp_Toto, + ucp_Garay, + ucp_Gurung_Khema, + ucp_Ol_Onal, + ucp_Sunuwar, + ucp_Todhri, + ucp_Tulu_Tigalari, /* Scripts which has no characters in other scripts. */ ucp_Unknown, ucp_Common, - ucp_Armenian, - ucp_Hebrew, - ucp_Thai, ucp_Lao, - ucp_Tibetan, - ucp_Ethiopic, - ucp_Cherokee, ucp_Canadian_Aboriginal, ucp_Ogham, - ucp_Runic, ucp_Khmer, ucp_Old_Italic, - ucp_Gothic, ucp_Deseret, ucp_Inherited, ucp_Ugaritic, - ucp_Shavian, ucp_Osmanya, ucp_Braille, ucp_New_Tai_Lue, - ucp_Tifinagh, ucp_Old_Persian, ucp_Kharoshthi, ucp_Balinese, @@ -320,32 +345,22 @@ enum { ucp_Vai, ucp_Saurashtra, ucp_Rejang, - ucp_Lycian, - ucp_Carian, - ucp_Lydian, ucp_Cham, ucp_Tai_Tham, ucp_Tai_Viet, - ucp_Avestan, ucp_Egyptian_Hieroglyphs, - ucp_Samaritan, - ucp_Lisu, ucp_Bamum, ucp_Meetei_Mayek, ucp_Imperial_Aramaic, ucp_Old_South_Arabian, ucp_Inscriptional_Parthian, ucp_Inscriptional_Pahlavi, - ucp_Old_Turkic, ucp_Batak, ucp_Brahmi, ucp_Meroitic_Cursive, - ucp_Meroitic_Hieroglyphs, ucp_Miao, ucp_Sora_Sompeng, - ucp_Caucasian_Albanian, ucp_Bassa_Vah, - ucp_Elbasan, ucp_Pahawh_Hmong, ucp_Mende_Kikakui, ucp_Mro, @@ -358,13 +373,10 @@ enum { ucp_Ahom, ucp_Anatolian_Hieroglyphs, ucp_Hatran, - ucp_Old_Hungarian, ucp_SignWriting, ucp_Bhaiksuki, ucp_Marchen, ucp_Newa, - ucp_Osage, - ucp_Tangut, ucp_Nushu, ucp_Soyombo, ucp_Zanabazar_Square, @@ -378,10 +390,10 @@ enum { ucp_Dives_Akuru, ucp_Khitan_Small_Script, ucp_Tangsa, - ucp_Toto, ucp_Vithkuqi, ucp_Kawi, ucp_Nag_Mundari, + ucp_Kirat_Rai, /* This must be last */ ucp_Script_Count @@ -389,7 +401,7 @@ enum { /* Size of entries in ucd_script_sets[] */ -#define ucd_script_sets_item_size 3 +#define ucd_script_sets_item_size 4 #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ diff --git a/src/pcre2_ucptables.c b/src/pcre2_ucptables.c index 2110014..d2b3403 100644 --- a/src/pcre2_ucptables.c +++ b/src/pcre2_ucptables.c @@ -199,6 +199,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_extendedpictographic0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_d STR_p STR_i STR_c STR_t STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0" #define STRING_extender0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_r "\0" #define STRING_extpict0 STR_e STR_x STR_t STR_p STR_i STR_c STR_t "\0" +#define STRING_gara0 STR_g STR_a STR_r STR_a "\0" +#define STRING_garay0 STR_g STR_a STR_r STR_a STR_y "\0" #define STRING_geor0 STR_g STR_e STR_o STR_r "\0" #define STRING_georgian0 STR_g STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0" #define STRING_glag0 STR_g STR_l STR_a STR_g "\0" @@ -219,9 +221,11 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_grlink0 STR_g STR_r STR_l STR_i STR_n STR_k "\0" #define STRING_gujarati0 STR_g STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" #define STRING_gujr0 STR_g STR_u STR_j STR_r "\0" +#define STRING_gukh0 STR_g STR_u STR_k STR_h "\0" #define STRING_gunjalagondi0 STR_g STR_u STR_n STR_j STR_a STR_l STR_a STR_g STR_o STR_n STR_d STR_i "\0" #define STRING_gurmukhi0 STR_g STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" #define STRING_guru0 STR_g STR_u STR_r STR_u "\0" +#define STRING_gurungkhema0 STR_g STR_u STR_r STR_u STR_n STR_g STR_k STR_h STR_e STR_m STR_a "\0" #define STRING_han0 STR_h STR_a STR_n "\0" #define STRING_hang0 STR_h STR_a STR_n STR_g "\0" #define STRING_hangul0 STR_h STR_a STR_n STR_g STR_u STR_l "\0" @@ -242,6 +246,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_hmnp0 STR_h STR_m STR_n STR_p "\0" #define STRING_hung0 STR_h STR_u STR_n STR_g "\0" #define STRING_idc0 STR_i STR_d STR_c "\0" +#define STRING_idcompatmathcontinue0 STR_i STR_d STR_c STR_o STR_m STR_p STR_a STR_t STR_m STR_a STR_t STR_h STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0" +#define STRING_idcompatmathstart0 STR_i STR_d STR_c STR_o STR_m STR_p STR_a STR_t STR_m STR_a STR_t STR_h STR_s STR_t STR_a STR_r STR_t "\0" #define STRING_idcontinue0 STR_i STR_d STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0" #define STRING_ideo0 STR_i STR_d STR_e STR_o "\0" #define STRING_ideographic0 STR_i STR_d STR_e STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0" @@ -251,7 +257,10 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_idst0 STR_i STR_d STR_s STR_t "\0" #define STRING_idstart0 STR_i STR_d STR_s STR_t STR_a STR_r STR_t "\0" #define STRING_idstrinaryoperator0 STR_i STR_d STR_s STR_t STR_r STR_i STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0" +#define STRING_idsu0 STR_i STR_d STR_s STR_u "\0" +#define STRING_idsunaryoperator0 STR_i STR_d STR_s STR_u STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0" #define STRING_imperialaramaic0 STR_i STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_a STR_r STR_a STR_m STR_a STR_i STR_c "\0" +#define STRING_incb0 STR_i STR_n STR_c STR_b "\0" #define STRING_inherited0 STR_i STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0" #define STRING_inscriptionalpahlavi0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0" #define STRING_inscriptionalparthian0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0" @@ -275,8 +284,10 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_khoj0 STR_k STR_h STR_o STR_j "\0" #define STRING_khojki0 STR_k STR_h STR_o STR_j STR_k STR_i "\0" #define STRING_khudawadi0 STR_k STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0" +#define STRING_kiratrai0 STR_k STR_i STR_r STR_a STR_t STR_r STR_a STR_i "\0" #define STRING_kits0 STR_k STR_i STR_t STR_s "\0" #define STRING_knda0 STR_k STR_n STR_d STR_a "\0" +#define STRING_krai0 STR_k STR_r STR_a STR_i "\0" #define STRING_kthi0 STR_k STR_t STR_h STR_i "\0" #define STRING_l0 STR_l "\0" #define STRING_l_AMPERSAND0 STR_l STR_AMPERSAND "\0" @@ -323,6 +334,7 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_masaramgondi0 STR_m STR_a STR_s STR_a STR_r STR_a STR_m STR_g STR_o STR_n STR_d STR_i "\0" #define STRING_math0 STR_m STR_a STR_t STR_h "\0" #define STRING_mc0 STR_m STR_c "\0" +#define STRING_mcm0 STR_m STR_c STR_m "\0" #define STRING_me0 STR_m STR_e "\0" #define STRING_medefaidrin0 STR_m STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0" #define STRING_medf0 STR_m STR_e STR_d STR_f "\0" @@ -337,6 +349,7 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_mlym0 STR_m STR_l STR_y STR_m "\0" #define STRING_mn0 STR_m STR_n "\0" #define STRING_modi0 STR_m STR_o STR_d STR_i "\0" +#define STRING_modifiercombiningmark0 STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r STR_c STR_o STR_m STR_b STR_i STR_n STR_i STR_n STR_g STR_m STR_a STR_r STR_k "\0" #define STRING_mong0 STR_m STR_o STR_n STR_g "\0" #define STRING_mongolian0 STR_m STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" #define STRING_mro0 STR_m STR_r STR_o "\0" @@ -379,6 +392,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_oldsoutharabian0 STR_o STR_l STR_d STR_s STR_o STR_u STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_oldturkic0 STR_o STR_l STR_d STR_t STR_u STR_r STR_k STR_i STR_c "\0" #define STRING_olduyghur0 STR_o STR_l STR_d STR_u STR_y STR_g STR_h STR_u STR_r "\0" +#define STRING_olonal0 STR_o STR_l STR_o STR_n STR_a STR_l "\0" +#define STRING_onao0 STR_o STR_n STR_a STR_o "\0" #define STRING_oriya0 STR_o STR_r STR_i STR_y STR_a "\0" #define STRING_orkh0 STR_o STR_r STR_k STR_h "\0" #define STRING_orya0 STR_o STR_r STR_y STR_a "\0" @@ -463,6 +478,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_sterm0 STR_s STR_t STR_e STR_r STR_m "\0" #define STRING_sund0 STR_s STR_u STR_n STR_d "\0" #define STRING_sundanese0 STR_s STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0" +#define STRING_sunu0 STR_s STR_u STR_n STR_u "\0" +#define STRING_sunuwar0 STR_s STR_u STR_n STR_u STR_w STR_a STR_r "\0" #define STRING_sylo0 STR_s STR_y STR_l STR_o "\0" #define STRING_sylotinagri0 STR_s STR_y STR_l STR_o STR_t STR_i STR_n STR_a STR_g STR_r STR_i "\0" #define STRING_syrc0 STR_s STR_y STR_r STR_c "\0" @@ -498,7 +515,11 @@ the "loose matching" rules that Unicode advises and Perl uses. */ #define STRING_tirh0 STR_t STR_i STR_r STR_h "\0" #define STRING_tirhuta0 STR_t STR_i STR_r STR_h STR_u STR_t STR_a "\0" #define STRING_tnsa0 STR_t STR_n STR_s STR_a "\0" +#define STRING_todhri0 STR_t STR_o STR_d STR_h STR_r STR_i "\0" +#define STRING_todr0 STR_t STR_o STR_d STR_r "\0" #define STRING_toto0 STR_t STR_o STR_t STR_o "\0" +#define STRING_tulutigalari0 STR_t STR_u STR_l STR_u STR_t STR_i STR_g STR_a STR_l STR_a STR_r STR_i "\0" +#define STRING_tutg0 STR_t STR_u STR_t STR_g "\0" #define STRING_ugar0 STR_u STR_g STR_a STR_r "\0" #define STRING_ugaritic0 STR_u STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" #define STRING_uideo0 STR_u STR_i STR_d STR_e STR_o "\0" @@ -690,6 +711,8 @@ const char PRIV(utt_names)[] = STRING_extendedpictographic0 STRING_extender0 STRING_extpict0 + STRING_gara0 + STRING_garay0 STRING_geor0 STRING_georgian0 STRING_glag0 @@ -710,9 +733,11 @@ const char PRIV(utt_names)[] = STRING_grlink0 STRING_gujarati0 STRING_gujr0 + STRING_gukh0 STRING_gunjalagondi0 STRING_gurmukhi0 STRING_guru0 + STRING_gurungkhema0 STRING_han0 STRING_hang0 STRING_hangul0 @@ -733,6 +758,8 @@ const char PRIV(utt_names)[] = STRING_hmnp0 STRING_hung0 STRING_idc0 + STRING_idcompatmathcontinue0 + STRING_idcompatmathstart0 STRING_idcontinue0 STRING_ideo0 STRING_ideographic0 @@ -742,7 +769,10 @@ const char PRIV(utt_names)[] = STRING_idst0 STRING_idstart0 STRING_idstrinaryoperator0 + STRING_idsu0 + STRING_idsunaryoperator0 STRING_imperialaramaic0 + STRING_incb0 STRING_inherited0 STRING_inscriptionalpahlavi0 STRING_inscriptionalparthian0 @@ -766,8 +796,10 @@ const char PRIV(utt_names)[] = STRING_khoj0 STRING_khojki0 STRING_khudawadi0 + STRING_kiratrai0 STRING_kits0 STRING_knda0 + STRING_krai0 STRING_kthi0 STRING_l0 STRING_l_AMPERSAND0 @@ -814,6 +846,7 @@ const char PRIV(utt_names)[] = STRING_masaramgondi0 STRING_math0 STRING_mc0 + STRING_mcm0 STRING_me0 STRING_medefaidrin0 STRING_medf0 @@ -828,6 +861,7 @@ const char PRIV(utt_names)[] = STRING_mlym0 STRING_mn0 STRING_modi0 + STRING_modifiercombiningmark0 STRING_mong0 STRING_mongolian0 STRING_mro0 @@ -870,6 +904,8 @@ const char PRIV(utt_names)[] = STRING_oldsoutharabian0 STRING_oldturkic0 STRING_olduyghur0 + STRING_olonal0 + STRING_onao0 STRING_oriya0 STRING_orkh0 STRING_orya0 @@ -954,6 +990,8 @@ const char PRIV(utt_names)[] = STRING_sterm0 STRING_sund0 STRING_sundanese0 + STRING_sunu0 + STRING_sunuwar0 STRING_sylo0 STRING_sylotinagri0 STRING_syrc0 @@ -989,7 +1027,11 @@ const char PRIV(utt_names)[] = STRING_tirh0 STRING_tirhuta0 STRING_tnsa0 + STRING_todhri0 + STRING_todr0 STRING_toto0 + STRING_tulutigalari0 + STRING_tutg0 STRING_ugar0 STRING_ugaritic0 STRING_uideo0 @@ -1037,7 +1079,7 @@ const char PRIV(utt_names)[] = const ucp_type_table PRIV(utt)[] = { { 0, PT_SCX, ucp_Adlam }, { 6, PT_SCX, ucp_Adlam }, - { 11, PT_SC, ucp_Caucasian_Albanian }, + { 11, PT_SCX, ucp_Caucasian_Albanian }, { 16, PT_BOOL, ucp_ASCII_Hex_Digit }, { 21, PT_SC, ucp_Ahom }, { 26, PT_BOOL, ucp_Alphabetic }, @@ -1046,13 +1088,13 @@ const ucp_type_table PRIV(utt)[] = { { 64, PT_ANY, 0 }, { 68, PT_SCX, ucp_Arabic }, { 73, PT_SCX, ucp_Arabic }, - { 80, PT_SC, ucp_Armenian }, + { 80, PT_SCX, ucp_Armenian }, { 89, PT_SC, ucp_Imperial_Aramaic }, - { 94, PT_SC, ucp_Armenian }, + { 94, PT_SCX, ucp_Armenian }, { 99, PT_BOOL, ucp_ASCII }, { 105, PT_BOOL, ucp_ASCII_Hex_Digit }, - { 119, PT_SC, ucp_Avestan }, - { 127, PT_SC, ucp_Avestan }, + { 119, PT_SCX, ucp_Avestan }, + { 127, PT_SCX, ucp_Avestan }, { 132, PT_SC, ucp_Balinese }, { 137, PT_SC, ucp_Balinese }, { 146, PT_SC, ucp_Bamum }, @@ -1106,11 +1148,11 @@ const ucp_type_table PRIV(utt)[] = { { 480, PT_SCX, ucp_Chakma }, { 485, PT_SC, ucp_Canadian_Aboriginal }, { 504, PT_SC, ucp_Canadian_Aboriginal }, - { 509, PT_SC, ucp_Carian }, - { 514, PT_SC, ucp_Carian }, + { 509, PT_SCX, ucp_Carian }, + { 514, PT_SCX, ucp_Carian }, { 521, PT_BOOL, ucp_Cased }, { 527, PT_BOOL, ucp_Case_Ignorable }, - { 541, PT_SC, ucp_Caucasian_Albanian }, + { 541, PT_SCX, ucp_Caucasian_Albanian }, { 559, PT_PC, ucp_Cc }, { 562, PT_PC, ucp_Cf }, { 565, PT_SCX, ucp_Chakma }, @@ -1120,8 +1162,8 @@ const ucp_type_table PRIV(utt)[] = { { 621, PT_BOOL, ucp_Changes_When_Lowercased }, { 643, PT_BOOL, ucp_Changes_When_Titlecased }, { 665, PT_BOOL, ucp_Changes_When_Uppercased }, - { 687, PT_SC, ucp_Cherokee }, - { 692, PT_SC, ucp_Cherokee }, + { 687, PT_SCX, ucp_Cherokee }, + { 692, PT_SCX, ucp_Cherokee }, { 701, PT_SC, ucp_Chorasmian }, { 712, PT_SC, ucp_Chorasmian }, { 717, PT_BOOL, ucp_Case_Ignorable }, @@ -1164,8 +1206,8 @@ const ucp_type_table PRIV(utt)[] = { { 963, PT_BOOL, ucp_Emoji_Component }, { 969, PT_SC, ucp_Egyptian_Hieroglyphs }, { 974, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 994, PT_SC, ucp_Elbasan }, - { 999, PT_SC, ucp_Elbasan }, + { 994, PT_SCX, ucp_Elbasan }, + { 999, PT_SCX, ucp_Elbasan }, { 1007, PT_SC, ucp_Elymaic }, { 1012, PT_SC, ucp_Elymaic }, { 1020, PT_BOOL, ucp_Emoji_Modifier }, @@ -1175,355 +1217,376 @@ const ucp_type_table PRIV(utt)[] = { { 1060, PT_BOOL, ucp_Emoji_Modifier_Base }, { 1078, PT_BOOL, ucp_Emoji_Presentation }, { 1096, PT_BOOL, ucp_Emoji_Presentation }, - { 1102, PT_SC, ucp_Ethiopic }, - { 1107, PT_SC, ucp_Ethiopic }, + { 1102, PT_SCX, ucp_Ethiopic }, + { 1107, PT_SCX, ucp_Ethiopic }, { 1116, PT_BOOL, ucp_Extender }, { 1120, PT_BOOL, ucp_Extended_Pictographic }, { 1141, PT_BOOL, ucp_Extender }, { 1150, PT_BOOL, ucp_Extended_Pictographic }, - { 1158, PT_SCX, ucp_Georgian }, - { 1163, PT_SCX, ucp_Georgian }, - { 1172, PT_SCX, ucp_Glagolitic }, - { 1177, PT_SCX, ucp_Glagolitic }, - { 1188, PT_SCX, ucp_Gunjala_Gondi }, - { 1193, PT_SCX, ucp_Masaram_Gondi }, - { 1198, PT_SC, ucp_Gothic }, - { 1203, PT_SC, ucp_Gothic }, - { 1210, PT_SCX, ucp_Grantha }, - { 1215, PT_SCX, ucp_Grantha }, - { 1223, PT_BOOL, ucp_Grapheme_Base }, - { 1236, PT_BOOL, ucp_Grapheme_Extend }, - { 1251, PT_BOOL, ucp_Grapheme_Link }, - { 1264, PT_BOOL, ucp_Grapheme_Base }, - { 1271, PT_SCX, ucp_Greek }, - { 1277, PT_SCX, ucp_Greek }, - { 1282, PT_BOOL, ucp_Grapheme_Extend }, - { 1288, PT_BOOL, ucp_Grapheme_Link }, - { 1295, PT_SCX, ucp_Gujarati }, - { 1304, PT_SCX, ucp_Gujarati }, - { 1309, PT_SCX, ucp_Gunjala_Gondi }, - { 1322, PT_SCX, ucp_Gurmukhi }, - { 1331, PT_SCX, ucp_Gurmukhi }, - { 1336, PT_SCX, ucp_Han }, - { 1340, PT_SCX, ucp_Hangul }, - { 1345, PT_SCX, ucp_Hangul }, - { 1352, PT_SCX, ucp_Han }, - { 1357, PT_SCX, ucp_Hanifi_Rohingya }, - { 1372, PT_SCX, ucp_Hanunoo }, - { 1377, PT_SCX, ucp_Hanunoo }, - { 1385, PT_SC, ucp_Hatran }, - { 1390, PT_SC, ucp_Hatran }, - { 1397, PT_SC, ucp_Hebrew }, - { 1402, PT_SC, ucp_Hebrew }, - { 1409, PT_BOOL, ucp_Hex_Digit }, - { 1413, PT_BOOL, ucp_Hex_Digit }, - { 1422, PT_SCX, ucp_Hiragana }, - { 1427, PT_SCX, ucp_Hiragana }, - { 1436, PT_SC, ucp_Anatolian_Hieroglyphs }, - { 1441, PT_SC, ucp_Pahawh_Hmong }, - { 1446, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, - { 1451, PT_SC, ucp_Old_Hungarian }, - { 1456, PT_BOOL, ucp_ID_Continue }, - { 1460, PT_BOOL, ucp_ID_Continue }, - { 1471, PT_BOOL, ucp_Ideographic }, - { 1476, PT_BOOL, ucp_Ideographic }, - { 1488, PT_BOOL, ucp_ID_Start }, - { 1492, PT_BOOL, ucp_IDS_Binary_Operator }, - { 1497, PT_BOOL, ucp_IDS_Binary_Operator }, - { 1515, PT_BOOL, ucp_IDS_Trinary_Operator }, - { 1520, PT_BOOL, ucp_ID_Start }, - { 1528, PT_BOOL, ucp_IDS_Trinary_Operator }, - { 1547, PT_SC, ucp_Imperial_Aramaic }, - { 1563, PT_SC, ucp_Inherited }, - { 1573, PT_SC, ucp_Inscriptional_Pahlavi }, - { 1594, PT_SC, ucp_Inscriptional_Parthian }, - { 1616, PT_SC, ucp_Old_Italic }, - { 1621, PT_SCX, ucp_Javanese }, - { 1626, PT_SCX, ucp_Javanese }, - { 1635, PT_BOOL, ucp_Join_Control }, - { 1641, PT_BOOL, ucp_Join_Control }, - { 1653, PT_SCX, ucp_Kaithi }, - { 1660, PT_SCX, ucp_Kayah_Li }, - { 1665, PT_SCX, ucp_Katakana }, - { 1670, PT_SCX, ucp_Kannada }, - { 1678, PT_SCX, ucp_Katakana }, - { 1687, PT_SC, ucp_Kawi }, - { 1692, PT_SCX, ucp_Kayah_Li }, - { 1700, PT_SC, ucp_Kharoshthi }, - { 1705, PT_SC, ucp_Kharoshthi }, - { 1716, PT_SC, ucp_Khitan_Small_Script }, - { 1734, PT_SC, ucp_Khmer }, - { 1740, PT_SC, ucp_Khmer }, - { 1745, PT_SCX, ucp_Khojki }, - { 1750, PT_SCX, ucp_Khojki }, - { 1757, PT_SCX, ucp_Khudawadi }, - { 1767, PT_SC, ucp_Khitan_Small_Script }, - { 1772, PT_SCX, ucp_Kannada }, - { 1777, PT_SCX, ucp_Kaithi }, - { 1782, PT_GC, ucp_L }, - { 1784, PT_LAMP, 0 }, - { 1787, PT_SC, ucp_Tai_Tham }, - { 1792, PT_SC, ucp_Lao }, - { 1796, PT_SC, ucp_Lao }, - { 1801, PT_SCX, ucp_Latin }, - { 1807, PT_SCX, ucp_Latin }, - { 1812, PT_LAMP, 0 }, - { 1815, PT_SC, ucp_Lepcha }, - { 1820, PT_SC, ucp_Lepcha }, - { 1827, PT_SCX, ucp_Limbu }, - { 1832, PT_SCX, ucp_Limbu }, - { 1838, PT_SCX, ucp_Linear_A }, - { 1843, PT_SCX, ucp_Linear_B }, - { 1848, PT_SCX, ucp_Linear_A }, - { 1856, PT_SCX, ucp_Linear_B }, - { 1864, PT_SC, ucp_Lisu }, - { 1869, PT_PC, ucp_Ll }, - { 1872, PT_PC, ucp_Lm }, - { 1875, PT_PC, ucp_Lo }, - { 1878, PT_BOOL, ucp_Logical_Order_Exception }, - { 1882, PT_BOOL, ucp_Logical_Order_Exception }, - { 1904, PT_BOOL, ucp_Lowercase }, - { 1910, PT_BOOL, ucp_Lowercase }, - { 1920, PT_PC, ucp_Lt }, - { 1923, PT_PC, ucp_Lu }, - { 1926, PT_SC, ucp_Lycian }, - { 1931, PT_SC, ucp_Lycian }, - { 1938, PT_SC, ucp_Lydian }, - { 1943, PT_SC, ucp_Lydian }, - { 1950, PT_GC, ucp_M }, - { 1952, PT_SCX, ucp_Mahajani }, - { 1961, PT_SCX, ucp_Mahajani }, - { 1966, PT_SC, ucp_Makasar }, - { 1971, PT_SC, ucp_Makasar }, - { 1979, PT_SCX, ucp_Malayalam }, - { 1989, PT_SCX, ucp_Mandaic }, - { 1994, PT_SCX, ucp_Mandaic }, - { 2002, PT_SCX, ucp_Manichaean }, - { 2007, PT_SCX, ucp_Manichaean }, - { 2018, PT_SC, ucp_Marchen }, - { 2023, PT_SC, ucp_Marchen }, - { 2031, PT_SCX, ucp_Masaram_Gondi }, - { 2044, PT_BOOL, ucp_Math }, - { 2049, PT_PC, ucp_Mc }, - { 2052, PT_PC, ucp_Me }, - { 2055, PT_SC, ucp_Medefaidrin }, - { 2067, PT_SC, ucp_Medefaidrin }, - { 2072, PT_SC, ucp_Meetei_Mayek }, - { 2084, PT_SC, ucp_Mende_Kikakui }, - { 2089, PT_SC, ucp_Mende_Kikakui }, - { 2102, PT_SC, ucp_Meroitic_Cursive }, - { 2107, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 2112, PT_SC, ucp_Meroitic_Cursive }, - { 2128, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 2148, PT_SC, ucp_Miao }, - { 2153, PT_SCX, ucp_Malayalam }, - { 2158, PT_PC, ucp_Mn }, - { 2161, PT_SCX, ucp_Modi }, - { 2166, PT_SCX, ucp_Mongolian }, - { 2171, PT_SCX, ucp_Mongolian }, - { 2181, PT_SC, ucp_Mro }, - { 2185, PT_SC, ucp_Mro }, - { 2190, PT_SC, ucp_Meetei_Mayek }, - { 2195, PT_SCX, ucp_Multani }, - { 2200, PT_SCX, ucp_Multani }, - { 2208, PT_SCX, ucp_Myanmar }, - { 2216, PT_SCX, ucp_Myanmar }, - { 2221, PT_GC, ucp_N }, - { 2223, PT_SC, ucp_Nabataean }, - { 2233, PT_SC, ucp_Nag_Mundari }, - { 2238, PT_SC, ucp_Nag_Mundari }, - { 2249, PT_SCX, ucp_Nandinagari }, - { 2254, PT_SCX, ucp_Nandinagari }, - { 2266, PT_SC, ucp_Old_North_Arabian }, - { 2271, PT_SC, ucp_Nabataean }, - { 2276, PT_BOOL, ucp_Noncharacter_Code_Point }, - { 2282, PT_PC, ucp_Nd }, - { 2285, PT_SC, ucp_Newa }, - { 2290, PT_SC, ucp_New_Tai_Lue }, - { 2300, PT_SCX, ucp_Nko }, - { 2304, PT_SCX, ucp_Nko }, - { 2309, PT_PC, ucp_Nl }, - { 2312, PT_PC, ucp_No }, - { 2315, PT_BOOL, ucp_Noncharacter_Code_Point }, - { 2337, PT_SC, ucp_Nushu }, - { 2342, PT_SC, ucp_Nushu }, - { 2348, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, - { 2369, PT_SC, ucp_Ogham }, - { 2374, PT_SC, ucp_Ogham }, - { 2380, PT_SC, ucp_Ol_Chiki }, - { 2388, PT_SC, ucp_Ol_Chiki }, - { 2393, PT_SC, ucp_Old_Hungarian }, - { 2406, PT_SC, ucp_Old_Italic }, - { 2416, PT_SC, ucp_Old_North_Arabian }, - { 2432, PT_SCX, ucp_Old_Permic }, - { 2442, PT_SC, ucp_Old_Persian }, - { 2453, PT_SC, ucp_Old_Sogdian }, - { 2464, PT_SC, ucp_Old_South_Arabian }, - { 2480, PT_SC, ucp_Old_Turkic }, - { 2490, PT_SCX, ucp_Old_Uyghur }, - { 2500, PT_SCX, ucp_Oriya }, - { 2506, PT_SC, ucp_Old_Turkic }, - { 2511, PT_SCX, ucp_Oriya }, - { 2516, PT_SC, ucp_Osage }, - { 2522, PT_SC, ucp_Osage }, - { 2527, PT_SC, ucp_Osmanya }, - { 2532, PT_SC, ucp_Osmanya }, - { 2540, PT_SCX, ucp_Old_Uyghur }, - { 2545, PT_GC, ucp_P }, - { 2547, PT_SC, ucp_Pahawh_Hmong }, - { 2559, PT_SC, ucp_Palmyrene }, - { 2564, PT_SC, ucp_Palmyrene }, - { 2574, PT_BOOL, ucp_Pattern_Syntax }, - { 2581, PT_BOOL, ucp_Pattern_Syntax }, - { 2595, PT_BOOL, ucp_Pattern_White_Space }, - { 2613, PT_BOOL, ucp_Pattern_White_Space }, - { 2619, PT_SC, ucp_Pau_Cin_Hau }, - { 2624, PT_SC, ucp_Pau_Cin_Hau }, - { 2634, PT_PC, ucp_Pc }, - { 2637, PT_BOOL, ucp_Prepended_Concatenation_Mark }, - { 2641, PT_PC, ucp_Pd }, - { 2644, PT_PC, ucp_Pe }, - { 2647, PT_SCX, ucp_Old_Permic }, - { 2652, PT_PC, ucp_Pf }, - { 2655, PT_SCX, ucp_Phags_Pa }, - { 2660, PT_SCX, ucp_Phags_Pa }, - { 2668, PT_SC, ucp_Inscriptional_Pahlavi }, - { 2673, PT_SCX, ucp_Psalter_Pahlavi }, - { 2678, PT_SC, ucp_Phoenician }, - { 2683, PT_SC, ucp_Phoenician }, - { 2694, PT_PC, ucp_Pi }, - { 2697, PT_SC, ucp_Miao }, - { 2702, PT_PC, ucp_Po }, - { 2705, PT_BOOL, ucp_Prepended_Concatenation_Mark }, - { 2732, PT_SC, ucp_Inscriptional_Parthian }, - { 2737, PT_PC, ucp_Ps }, - { 2740, PT_SCX, ucp_Psalter_Pahlavi }, - { 2755, PT_SCX, ucp_Coptic }, - { 2760, PT_SC, ucp_Inherited }, - { 2765, PT_BOOL, ucp_Quotation_Mark }, - { 2771, PT_BOOL, ucp_Quotation_Mark }, - { 2785, PT_BOOL, ucp_Radical }, - { 2793, PT_BOOL, ucp_Regional_Indicator }, - { 2811, PT_SC, ucp_Rejang }, - { 2818, PT_BOOL, ucp_Regional_Indicator }, - { 2821, PT_SC, ucp_Rejang }, - { 2826, PT_SCX, ucp_Hanifi_Rohingya }, - { 2831, PT_SC, ucp_Runic }, - { 2837, PT_SC, ucp_Runic }, - { 2842, PT_GC, ucp_S }, - { 2844, PT_SC, ucp_Samaritan }, - { 2854, PT_SC, ucp_Samaritan }, - { 2859, PT_SC, ucp_Old_South_Arabian }, - { 2864, PT_SC, ucp_Saurashtra }, - { 2869, PT_SC, ucp_Saurashtra }, - { 2880, PT_PC, ucp_Sc }, - { 2883, PT_BOOL, ucp_Soft_Dotted }, - { 2886, PT_BOOL, ucp_Sentence_Terminal }, - { 2903, PT_SC, ucp_SignWriting }, - { 2908, PT_SCX, ucp_Sharada }, - { 2916, PT_SC, ucp_Shavian }, - { 2924, PT_SC, ucp_Shavian }, - { 2929, PT_SCX, ucp_Sharada }, - { 2934, PT_SC, ucp_Siddham }, - { 2939, PT_SC, ucp_Siddham }, - { 2947, PT_SC, ucp_SignWriting }, - { 2959, PT_SCX, ucp_Khudawadi }, - { 2964, PT_SCX, ucp_Sinhala }, - { 2969, PT_SCX, ucp_Sinhala }, - { 2977, PT_PC, ucp_Sk }, - { 2980, PT_PC, ucp_Sm }, - { 2983, PT_PC, ucp_So }, - { 2986, PT_BOOL, ucp_Soft_Dotted }, - { 2997, PT_SCX, ucp_Sogdian }, - { 3002, PT_SCX, ucp_Sogdian }, - { 3010, PT_SC, ucp_Old_Sogdian }, - { 3015, PT_SC, ucp_Sora_Sompeng }, - { 3020, PT_SC, ucp_Sora_Sompeng }, - { 3032, PT_SC, ucp_Soyombo }, - { 3037, PT_SC, ucp_Soyombo }, - { 3045, PT_BOOL, ucp_White_Space }, - { 3051, PT_BOOL, ucp_Sentence_Terminal }, - { 3057, PT_SC, ucp_Sundanese }, - { 3062, PT_SC, ucp_Sundanese }, - { 3072, PT_SCX, ucp_Syloti_Nagri }, - { 3077, PT_SCX, ucp_Syloti_Nagri }, - { 3089, PT_SCX, ucp_Syriac }, - { 3094, PT_SCX, ucp_Syriac }, - { 3101, PT_SCX, ucp_Tagalog }, - { 3109, PT_SCX, ucp_Tagbanwa }, - { 3114, PT_SCX, ucp_Tagbanwa }, - { 3123, PT_SCX, ucp_Tai_Le }, - { 3129, PT_SC, ucp_Tai_Tham }, - { 3137, PT_SC, ucp_Tai_Viet }, - { 3145, PT_SCX, ucp_Takri }, - { 3150, PT_SCX, ucp_Takri }, - { 3156, PT_SCX, ucp_Tai_Le }, - { 3161, PT_SC, ucp_New_Tai_Lue }, - { 3166, PT_SCX, ucp_Tamil }, - { 3172, PT_SCX, ucp_Tamil }, - { 3177, PT_SC, ucp_Tangut }, - { 3182, PT_SC, ucp_Tangsa }, - { 3189, PT_SC, ucp_Tangut }, - { 3196, PT_SC, ucp_Tai_Viet }, - { 3201, PT_SCX, ucp_Telugu }, - { 3206, PT_SCX, ucp_Telugu }, - { 3213, PT_BOOL, ucp_Terminal_Punctuation }, - { 3218, PT_BOOL, ucp_Terminal_Punctuation }, - { 3238, PT_SC, ucp_Tifinagh }, - { 3243, PT_SCX, ucp_Tagalog }, - { 3248, PT_SCX, ucp_Thaana }, - { 3253, PT_SCX, ucp_Thaana }, - { 3260, PT_SC, ucp_Thai }, - { 3265, PT_SC, ucp_Tibetan }, - { 3273, PT_SC, ucp_Tibetan }, - { 3278, PT_SC, ucp_Tifinagh }, - { 3287, PT_SCX, ucp_Tirhuta }, - { 3292, PT_SCX, ucp_Tirhuta }, - { 3300, PT_SC, ucp_Tangsa }, - { 3305, PT_SC, ucp_Toto }, - { 3310, PT_SC, ucp_Ugaritic }, - { 3315, PT_SC, ucp_Ugaritic }, - { 3324, PT_BOOL, ucp_Unified_Ideograph }, - { 3330, PT_BOOL, ucp_Unified_Ideograph }, - { 3347, PT_SC, ucp_Unknown }, - { 3355, PT_BOOL, ucp_Uppercase }, - { 3361, PT_BOOL, ucp_Uppercase }, - { 3371, PT_SC, ucp_Vai }, - { 3375, PT_SC, ucp_Vai }, - { 3380, PT_BOOL, ucp_Variation_Selector }, - { 3398, PT_SC, ucp_Vithkuqi }, - { 3403, PT_SC, ucp_Vithkuqi }, - { 3412, PT_BOOL, ucp_Variation_Selector }, - { 3415, PT_SC, ucp_Wancho }, - { 3422, PT_SC, ucp_Warang_Citi }, - { 3427, PT_SC, ucp_Warang_Citi }, - { 3438, PT_SC, ucp_Wancho }, - { 3443, PT_BOOL, ucp_White_Space }, - { 3454, PT_BOOL, ucp_White_Space }, - { 3461, PT_ALNUM, 0 }, - { 3465, PT_BOOL, ucp_XID_Continue }, - { 3470, PT_BOOL, ucp_XID_Continue }, - { 3482, PT_BOOL, ucp_XID_Start }, - { 3487, PT_BOOL, ucp_XID_Start }, - { 3496, PT_SC, ucp_Old_Persian }, - { 3501, PT_PXSPACE, 0 }, - { 3505, PT_SPACE, 0 }, - { 3509, PT_SC, ucp_Cuneiform }, - { 3514, PT_UCNC, 0 }, - { 3518, PT_WORD, 0 }, - { 3522, PT_SCX, ucp_Yezidi }, - { 3527, PT_SCX, ucp_Yezidi }, - { 3534, PT_SCX, ucp_Yi }, - { 3537, PT_SCX, ucp_Yi }, - { 3542, PT_GC, ucp_Z }, - { 3544, PT_SC, ucp_Zanabazar_Square }, - { 3560, PT_SC, ucp_Zanabazar_Square }, - { 3565, PT_SC, ucp_Inherited }, - { 3570, PT_PC, ucp_Zl }, - { 3573, PT_PC, ucp_Zp }, - { 3576, PT_PC, ucp_Zs }, - { 3579, PT_SC, ucp_Common }, - { 3584, PT_SC, ucp_Unknown } + { 1158, PT_SCX, ucp_Garay }, + { 1163, PT_SCX, ucp_Garay }, + { 1169, PT_SCX, ucp_Georgian }, + { 1174, PT_SCX, ucp_Georgian }, + { 1183, PT_SCX, ucp_Glagolitic }, + { 1188, PT_SCX, ucp_Glagolitic }, + { 1199, PT_SCX, ucp_Gunjala_Gondi }, + { 1204, PT_SCX, ucp_Masaram_Gondi }, + { 1209, PT_SCX, ucp_Gothic }, + { 1214, PT_SCX, ucp_Gothic }, + { 1221, PT_SCX, ucp_Grantha }, + { 1226, PT_SCX, ucp_Grantha }, + { 1234, PT_BOOL, ucp_Grapheme_Base }, + { 1247, PT_BOOL, ucp_Grapheme_Extend }, + { 1262, PT_BOOL, ucp_Grapheme_Link }, + { 1275, PT_BOOL, ucp_Grapheme_Base }, + { 1282, PT_SCX, ucp_Greek }, + { 1288, PT_SCX, ucp_Greek }, + { 1293, PT_BOOL, ucp_Grapheme_Extend }, + { 1299, PT_BOOL, ucp_Grapheme_Link }, + { 1306, PT_SCX, ucp_Gujarati }, + { 1315, PT_SCX, ucp_Gujarati }, + { 1320, PT_SCX, ucp_Gurung_Khema }, + { 1325, PT_SCX, ucp_Gunjala_Gondi }, + { 1338, PT_SCX, ucp_Gurmukhi }, + { 1347, PT_SCX, ucp_Gurmukhi }, + { 1352, PT_SCX, ucp_Gurung_Khema }, + { 1364, PT_SCX, ucp_Han }, + { 1368, PT_SCX, ucp_Hangul }, + { 1373, PT_SCX, ucp_Hangul }, + { 1380, PT_SCX, ucp_Han }, + { 1385, PT_SCX, ucp_Hanifi_Rohingya }, + { 1400, PT_SCX, ucp_Hanunoo }, + { 1405, PT_SCX, ucp_Hanunoo }, + { 1413, PT_SC, ucp_Hatran }, + { 1418, PT_SC, ucp_Hatran }, + { 1425, PT_SCX, ucp_Hebrew }, + { 1430, PT_SCX, ucp_Hebrew }, + { 1437, PT_BOOL, ucp_Hex_Digit }, + { 1441, PT_BOOL, ucp_Hex_Digit }, + { 1450, PT_SCX, ucp_Hiragana }, + { 1455, PT_SCX, ucp_Hiragana }, + { 1464, PT_SC, ucp_Anatolian_Hieroglyphs }, + { 1469, PT_SC, ucp_Pahawh_Hmong }, + { 1474, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, + { 1479, PT_SCX, ucp_Old_Hungarian }, + { 1484, PT_BOOL, ucp_ID_Continue }, + { 1488, PT_BOOL, ucp_ID_Compat_Math_Continue }, + { 1509, PT_BOOL, ucp_ID_Compat_Math_Start }, + { 1527, PT_BOOL, ucp_ID_Continue }, + { 1538, PT_BOOL, ucp_Ideographic }, + { 1543, PT_BOOL, ucp_Ideographic }, + { 1555, PT_BOOL, ucp_ID_Start }, + { 1559, PT_BOOL, ucp_IDS_Binary_Operator }, + { 1564, PT_BOOL, ucp_IDS_Binary_Operator }, + { 1582, PT_BOOL, ucp_IDS_Trinary_Operator }, + { 1587, PT_BOOL, ucp_ID_Start }, + { 1595, PT_BOOL, ucp_IDS_Trinary_Operator }, + { 1614, PT_BOOL, ucp_IDS_Unary_Operator }, + { 1619, PT_BOOL, ucp_IDS_Unary_Operator }, + { 1636, PT_SC, ucp_Imperial_Aramaic }, + { 1652, PT_BOOL, ucp_InCB }, + { 1657, PT_SC, ucp_Inherited }, + { 1667, PT_SC, ucp_Inscriptional_Pahlavi }, + { 1688, PT_SC, ucp_Inscriptional_Parthian }, + { 1710, PT_SC, ucp_Old_Italic }, + { 1715, PT_SCX, ucp_Javanese }, + { 1720, PT_SCX, ucp_Javanese }, + { 1729, PT_BOOL, ucp_Join_Control }, + { 1735, PT_BOOL, ucp_Join_Control }, + { 1747, PT_SCX, ucp_Kaithi }, + { 1754, PT_SCX, ucp_Kayah_Li }, + { 1759, PT_SCX, ucp_Katakana }, + { 1764, PT_SCX, ucp_Kannada }, + { 1772, PT_SCX, ucp_Katakana }, + { 1781, PT_SC, ucp_Kawi }, + { 1786, PT_SCX, ucp_Kayah_Li }, + { 1794, PT_SC, ucp_Kharoshthi }, + { 1799, PT_SC, ucp_Kharoshthi }, + { 1810, PT_SC, ucp_Khitan_Small_Script }, + { 1828, PT_SC, ucp_Khmer }, + { 1834, PT_SC, ucp_Khmer }, + { 1839, PT_SCX, ucp_Khojki }, + { 1844, PT_SCX, ucp_Khojki }, + { 1851, PT_SCX, ucp_Khudawadi }, + { 1861, PT_SC, ucp_Kirat_Rai }, + { 1870, PT_SC, ucp_Khitan_Small_Script }, + { 1875, PT_SCX, ucp_Kannada }, + { 1880, PT_SC, ucp_Kirat_Rai }, + { 1885, PT_SCX, ucp_Kaithi }, + { 1890, PT_GC, ucp_L }, + { 1892, PT_LAMP, 0 }, + { 1895, PT_SC, ucp_Tai_Tham }, + { 1900, PT_SC, ucp_Lao }, + { 1904, PT_SC, ucp_Lao }, + { 1909, PT_SCX, ucp_Latin }, + { 1915, PT_SCX, ucp_Latin }, + { 1920, PT_LAMP, 0 }, + { 1923, PT_SC, ucp_Lepcha }, + { 1928, PT_SC, ucp_Lepcha }, + { 1935, PT_SCX, ucp_Limbu }, + { 1940, PT_SCX, ucp_Limbu }, + { 1946, PT_SCX, ucp_Linear_A }, + { 1951, PT_SCX, ucp_Linear_B }, + { 1956, PT_SCX, ucp_Linear_A }, + { 1964, PT_SCX, ucp_Linear_B }, + { 1972, PT_SCX, ucp_Lisu }, + { 1977, PT_PC, ucp_Ll }, + { 1980, PT_PC, ucp_Lm }, + { 1983, PT_PC, ucp_Lo }, + { 1986, PT_BOOL, ucp_Logical_Order_Exception }, + { 1990, PT_BOOL, ucp_Logical_Order_Exception }, + { 2012, PT_BOOL, ucp_Lowercase }, + { 2018, PT_BOOL, ucp_Lowercase }, + { 2028, PT_PC, ucp_Lt }, + { 2031, PT_PC, ucp_Lu }, + { 2034, PT_SCX, ucp_Lycian }, + { 2039, PT_SCX, ucp_Lycian }, + { 2046, PT_SCX, ucp_Lydian }, + { 2051, PT_SCX, ucp_Lydian }, + { 2058, PT_GC, ucp_M }, + { 2060, PT_SCX, ucp_Mahajani }, + { 2069, PT_SCX, ucp_Mahajani }, + { 2074, PT_SC, ucp_Makasar }, + { 2079, PT_SC, ucp_Makasar }, + { 2087, PT_SCX, ucp_Malayalam }, + { 2097, PT_SCX, ucp_Mandaic }, + { 2102, PT_SCX, ucp_Mandaic }, + { 2110, PT_SCX, ucp_Manichaean }, + { 2115, PT_SCX, ucp_Manichaean }, + { 2126, PT_SC, ucp_Marchen }, + { 2131, PT_SC, ucp_Marchen }, + { 2139, PT_SCX, ucp_Masaram_Gondi }, + { 2152, PT_BOOL, ucp_Math }, + { 2157, PT_PC, ucp_Mc }, + { 2160, PT_BOOL, ucp_Modifier_Combining_Mark }, + { 2164, PT_PC, ucp_Me }, + { 2167, PT_SC, ucp_Medefaidrin }, + { 2179, PT_SC, ucp_Medefaidrin }, + { 2184, PT_SC, ucp_Meetei_Mayek }, + { 2196, PT_SC, ucp_Mende_Kikakui }, + { 2201, PT_SC, ucp_Mende_Kikakui }, + { 2214, PT_SC, ucp_Meroitic_Cursive }, + { 2219, PT_SCX, ucp_Meroitic_Hieroglyphs }, + { 2224, PT_SC, ucp_Meroitic_Cursive }, + { 2240, PT_SCX, ucp_Meroitic_Hieroglyphs }, + { 2260, PT_SC, ucp_Miao }, + { 2265, PT_SCX, ucp_Malayalam }, + { 2270, PT_PC, ucp_Mn }, + { 2273, PT_SCX, ucp_Modi }, + { 2278, PT_BOOL, ucp_Modifier_Combining_Mark }, + { 2300, PT_SCX, ucp_Mongolian }, + { 2305, PT_SCX, ucp_Mongolian }, + { 2315, PT_SC, ucp_Mro }, + { 2319, PT_SC, ucp_Mro }, + { 2324, PT_SC, ucp_Meetei_Mayek }, + { 2329, PT_SCX, ucp_Multani }, + { 2334, PT_SCX, ucp_Multani }, + { 2342, PT_SCX, ucp_Myanmar }, + { 2350, PT_SCX, ucp_Myanmar }, + { 2355, PT_GC, ucp_N }, + { 2357, PT_SC, ucp_Nabataean }, + { 2367, PT_SC, ucp_Nag_Mundari }, + { 2372, PT_SC, ucp_Nag_Mundari }, + { 2383, PT_SCX, ucp_Nandinagari }, + { 2388, PT_SCX, ucp_Nandinagari }, + { 2400, PT_SC, ucp_Old_North_Arabian }, + { 2405, PT_SC, ucp_Nabataean }, + { 2410, PT_BOOL, ucp_Noncharacter_Code_Point }, + { 2416, PT_PC, ucp_Nd }, + { 2419, PT_SC, ucp_Newa }, + { 2424, PT_SC, ucp_New_Tai_Lue }, + { 2434, PT_SCX, ucp_Nko }, + { 2438, PT_SCX, ucp_Nko }, + { 2443, PT_PC, ucp_Nl }, + { 2446, PT_PC, ucp_No }, + { 2449, PT_BOOL, ucp_Noncharacter_Code_Point }, + { 2471, PT_SC, ucp_Nushu }, + { 2476, PT_SC, ucp_Nushu }, + { 2482, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, + { 2503, PT_SC, ucp_Ogham }, + { 2508, PT_SC, ucp_Ogham }, + { 2514, PT_SC, ucp_Ol_Chiki }, + { 2522, PT_SC, ucp_Ol_Chiki }, + { 2527, PT_SCX, ucp_Old_Hungarian }, + { 2540, PT_SC, ucp_Old_Italic }, + { 2550, PT_SC, ucp_Old_North_Arabian }, + { 2566, PT_SCX, ucp_Old_Permic }, + { 2576, PT_SC, ucp_Old_Persian }, + { 2587, PT_SC, ucp_Old_Sogdian }, + { 2598, PT_SC, ucp_Old_South_Arabian }, + { 2614, PT_SCX, ucp_Old_Turkic }, + { 2624, PT_SCX, ucp_Old_Uyghur }, + { 2634, PT_SCX, ucp_Ol_Onal }, + { 2641, PT_SCX, ucp_Ol_Onal }, + { 2646, PT_SCX, ucp_Oriya }, + { 2652, PT_SCX, ucp_Old_Turkic }, + { 2657, PT_SCX, ucp_Oriya }, + { 2662, PT_SCX, ucp_Osage }, + { 2668, PT_SCX, ucp_Osage }, + { 2673, PT_SC, ucp_Osmanya }, + { 2678, PT_SC, ucp_Osmanya }, + { 2686, PT_SCX, ucp_Old_Uyghur }, + { 2691, PT_GC, ucp_P }, + { 2693, PT_SC, ucp_Pahawh_Hmong }, + { 2705, PT_SC, ucp_Palmyrene }, + { 2710, PT_SC, ucp_Palmyrene }, + { 2720, PT_BOOL, ucp_Pattern_Syntax }, + { 2727, PT_BOOL, ucp_Pattern_Syntax }, + { 2741, PT_BOOL, ucp_Pattern_White_Space }, + { 2759, PT_BOOL, ucp_Pattern_White_Space }, + { 2765, PT_SC, ucp_Pau_Cin_Hau }, + { 2770, PT_SC, ucp_Pau_Cin_Hau }, + { 2780, PT_PC, ucp_Pc }, + { 2783, PT_BOOL, ucp_Prepended_Concatenation_Mark }, + { 2787, PT_PC, ucp_Pd }, + { 2790, PT_PC, ucp_Pe }, + { 2793, PT_SCX, ucp_Old_Permic }, + { 2798, PT_PC, ucp_Pf }, + { 2801, PT_SCX, ucp_Phags_Pa }, + { 2806, PT_SCX, ucp_Phags_Pa }, + { 2814, PT_SC, ucp_Inscriptional_Pahlavi }, + { 2819, PT_SCX, ucp_Psalter_Pahlavi }, + { 2824, PT_SC, ucp_Phoenician }, + { 2829, PT_SC, ucp_Phoenician }, + { 2840, PT_PC, ucp_Pi }, + { 2843, PT_SC, ucp_Miao }, + { 2848, PT_PC, ucp_Po }, + { 2851, PT_BOOL, ucp_Prepended_Concatenation_Mark }, + { 2878, PT_SC, ucp_Inscriptional_Parthian }, + { 2883, PT_PC, ucp_Ps }, + { 2886, PT_SCX, ucp_Psalter_Pahlavi }, + { 2901, PT_SCX, ucp_Coptic }, + { 2906, PT_SC, ucp_Inherited }, + { 2911, PT_BOOL, ucp_Quotation_Mark }, + { 2917, PT_BOOL, ucp_Quotation_Mark }, + { 2931, PT_BOOL, ucp_Radical }, + { 2939, PT_BOOL, ucp_Regional_Indicator }, + { 2957, PT_SC, ucp_Rejang }, + { 2964, PT_BOOL, ucp_Regional_Indicator }, + { 2967, PT_SC, ucp_Rejang }, + { 2972, PT_SCX, ucp_Hanifi_Rohingya }, + { 2977, PT_SCX, ucp_Runic }, + { 2983, PT_SCX, ucp_Runic }, + { 2988, PT_GC, ucp_S }, + { 2990, PT_SCX, ucp_Samaritan }, + { 3000, PT_SCX, ucp_Samaritan }, + { 3005, PT_SC, ucp_Old_South_Arabian }, + { 3010, PT_SC, ucp_Saurashtra }, + { 3015, PT_SC, ucp_Saurashtra }, + { 3026, PT_PC, ucp_Sc }, + { 3029, PT_BOOL, ucp_Soft_Dotted }, + { 3032, PT_BOOL, ucp_Sentence_Terminal }, + { 3049, PT_SC, ucp_SignWriting }, + { 3054, PT_SCX, ucp_Sharada }, + { 3062, PT_SCX, ucp_Shavian }, + { 3070, PT_SCX, ucp_Shavian }, + { 3075, PT_SCX, ucp_Sharada }, + { 3080, PT_SC, ucp_Siddham }, + { 3085, PT_SC, ucp_Siddham }, + { 3093, PT_SC, ucp_SignWriting }, + { 3105, PT_SCX, ucp_Khudawadi }, + { 3110, PT_SCX, ucp_Sinhala }, + { 3115, PT_SCX, ucp_Sinhala }, + { 3123, PT_PC, ucp_Sk }, + { 3126, PT_PC, ucp_Sm }, + { 3129, PT_PC, ucp_So }, + { 3132, PT_BOOL, ucp_Soft_Dotted }, + { 3143, PT_SCX, ucp_Sogdian }, + { 3148, PT_SCX, ucp_Sogdian }, + { 3156, PT_SC, ucp_Old_Sogdian }, + { 3161, PT_SC, ucp_Sora_Sompeng }, + { 3166, PT_SC, ucp_Sora_Sompeng }, + { 3178, PT_SC, ucp_Soyombo }, + { 3183, PT_SC, ucp_Soyombo }, + { 3191, PT_BOOL, ucp_White_Space }, + { 3197, PT_BOOL, ucp_Sentence_Terminal }, + { 3203, PT_SC, ucp_Sundanese }, + { 3208, PT_SC, ucp_Sundanese }, + { 3218, PT_SCX, ucp_Sunuwar }, + { 3223, PT_SCX, ucp_Sunuwar }, + { 3231, PT_SCX, ucp_Syloti_Nagri }, + { 3236, PT_SCX, ucp_Syloti_Nagri }, + { 3248, PT_SCX, ucp_Syriac }, + { 3253, PT_SCX, ucp_Syriac }, + { 3260, PT_SCX, ucp_Tagalog }, + { 3268, PT_SCX, ucp_Tagbanwa }, + { 3273, PT_SCX, ucp_Tagbanwa }, + { 3282, PT_SCX, ucp_Tai_Le }, + { 3288, PT_SC, ucp_Tai_Tham }, + { 3296, PT_SC, ucp_Tai_Viet }, + { 3304, PT_SCX, ucp_Takri }, + { 3309, PT_SCX, ucp_Takri }, + { 3315, PT_SCX, ucp_Tai_Le }, + { 3320, PT_SC, ucp_New_Tai_Lue }, + { 3325, PT_SCX, ucp_Tamil }, + { 3331, PT_SCX, ucp_Tamil }, + { 3336, PT_SCX, ucp_Tangut }, + { 3341, PT_SC, ucp_Tangsa }, + { 3348, PT_SCX, ucp_Tangut }, + { 3355, PT_SC, ucp_Tai_Viet }, + { 3360, PT_SCX, ucp_Telugu }, + { 3365, PT_SCX, ucp_Telugu }, + { 3372, PT_BOOL, ucp_Terminal_Punctuation }, + { 3377, PT_BOOL, ucp_Terminal_Punctuation }, + { 3397, PT_SCX, ucp_Tifinagh }, + { 3402, PT_SCX, ucp_Tagalog }, + { 3407, PT_SCX, ucp_Thaana }, + { 3412, PT_SCX, ucp_Thaana }, + { 3419, PT_SCX, ucp_Thai }, + { 3424, PT_SCX, ucp_Tibetan }, + { 3432, PT_SCX, ucp_Tibetan }, + { 3437, PT_SCX, ucp_Tifinagh }, + { 3446, PT_SCX, ucp_Tirhuta }, + { 3451, PT_SCX, ucp_Tirhuta }, + { 3459, PT_SC, ucp_Tangsa }, + { 3464, PT_SCX, ucp_Todhri }, + { 3471, PT_SCX, ucp_Todhri }, + { 3476, PT_SCX, ucp_Toto }, + { 3481, PT_SCX, ucp_Tulu_Tigalari }, + { 3494, PT_SCX, ucp_Tulu_Tigalari }, + { 3499, PT_SC, ucp_Ugaritic }, + { 3504, PT_SC, ucp_Ugaritic }, + { 3513, PT_BOOL, ucp_Unified_Ideograph }, + { 3519, PT_BOOL, ucp_Unified_Ideograph }, + { 3536, PT_SC, ucp_Unknown }, + { 3544, PT_BOOL, ucp_Uppercase }, + { 3550, PT_BOOL, ucp_Uppercase }, + { 3560, PT_SC, ucp_Vai }, + { 3564, PT_SC, ucp_Vai }, + { 3569, PT_BOOL, ucp_Variation_Selector }, + { 3587, PT_SC, ucp_Vithkuqi }, + { 3592, PT_SC, ucp_Vithkuqi }, + { 3601, PT_BOOL, ucp_Variation_Selector }, + { 3604, PT_SC, ucp_Wancho }, + { 3611, PT_SC, ucp_Warang_Citi }, + { 3616, PT_SC, ucp_Warang_Citi }, + { 3627, PT_SC, ucp_Wancho }, + { 3632, PT_BOOL, ucp_White_Space }, + { 3643, PT_BOOL, ucp_White_Space }, + { 3650, PT_ALNUM, 0 }, + { 3654, PT_BOOL, ucp_XID_Continue }, + { 3659, PT_BOOL, ucp_XID_Continue }, + { 3671, PT_BOOL, ucp_XID_Start }, + { 3676, PT_BOOL, ucp_XID_Start }, + { 3685, PT_SC, ucp_Old_Persian }, + { 3690, PT_PXSPACE, 0 }, + { 3694, PT_SPACE, 0 }, + { 3698, PT_SC, ucp_Cuneiform }, + { 3703, PT_UCNC, 0 }, + { 3707, PT_WORD, 0 }, + { 3711, PT_SCX, ucp_Yezidi }, + { 3716, PT_SCX, ucp_Yezidi }, + { 3723, PT_SCX, ucp_Yi }, + { 3726, PT_SCX, ucp_Yi }, + { 3731, PT_GC, ucp_Z }, + { 3733, PT_SC, ucp_Zanabazar_Square }, + { 3749, PT_SC, ucp_Zanabazar_Square }, + { 3754, PT_SC, ucp_Inherited }, + { 3759, PT_PC, ucp_Zl }, + { 3762, PT_PC, ucp_Zp }, + { 3765, PT_PC, ucp_Zs }, + { 3768, PT_SC, ucp_Common }, + { 3773, PT_SC, ucp_Unknown } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); diff --git a/src/pcre2_util.h b/src/pcre2_util.h new file mode 100644 index 0000000..ea86355 --- /dev/null +++ b/src/pcre2_util.h @@ -0,0 +1,132 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD +#define PCRE2_UTIL_H_IDEMPOTENT_GUARD + +/* Assertion macros */ + +#ifdef PCRE2_DEBUG + +#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) +#include +#endif + +/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions +that the code below doesn't support. It is a NOP for non debug builds +but in debug builds will print information about the location of the +code where it triggered and crash. + +It is meant to work like assert(), and therefore the expression used +should indicate what the expected state is, and shouldn't have any +side-effects. */ + +#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) +#define PCRE2_ASSERT(x) assert(x) +#else +#define PCRE2_ASSERT(x) do \ +{ \ + if (!(x)) \ + { \ + fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + abort(); \ + } \ +} while(0) +#endif + +/* PCRE2_UNREACHABLE() can be used to mark locations on the code that +shouldn't be reached. In non debug builds is defined as a hint for +the compiler to eliminate any code after it, so it is useful also for +performance reasons, but should be used with care because if it is +ever reached will trigger Undefined Behaviour and if you are lucky a +crash. In debug builds it will report the location where it was triggered +and crash. One important point to consider when using this macro, is +that it is only implemented for a few compilers, and therefore can't +be relied on to always be active either, so if it is followed by some +code it is important to make sure that the whole thing is safe to +use even if the macro is not there (ex: make sure there is a `break` +after it if used at the end of a `case`) and to test your code also +with a configuration where the macro will be a NOP. */ + +#if defined(HAVE_ASSERT_H) && !defined(NDEBUG) +#define PCRE2_UNREACHABLE() \ +assert(((void)"Execution reached unexpected point", 0)) +#else +#define PCRE2_UNREACHABLE() do \ +{ \ +fprintf(stderr, "Execution reached unexpected point at " __FILE__ \ + ":%d\n", __LINE__); \ +abort(); \ +} while(0) +#endif + +/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous +macro. It is meant to be used in places where the code is handling +an error situation in code that shouldn't be reached, but that has +some sort of fallback code to normally handle the error. When in +doubt you should use this instead of the previous macro. Like in +the previous case, it is a good idea to document as much as possible +the reason and the actions that should be taken if it ever triggers. */ + +#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE() + +#endif /* PCRE2_DEBUG */ + +#ifndef PCRE2_DEBUG_UNREACHABLE +#define PCRE2_DEBUG_UNREACHABLE() do {} while(0) +#endif + +#ifndef PCRE2_UNREACHABLE +#ifdef HAVE_BUILTIN_UNREACHABLE +#define PCRE2_UNREACHABLE() __builtin_unreachable() +#elif defined(HAVE_BUILTIN_ASSUME) +#define PCRE2_UNREACHABLE() __assume(0) +#else +#define PCRE2_UNREACHABLE() do {} while(0) +#endif +#endif /* !PCRE2_UNREACHABLE */ + +#ifndef PCRE2_ASSERT +#define PCRE2_ASSERT(x) do {} while(0) +#endif + +#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */ + +/* End of pcre2_util.h */ diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c index 5df25d2..25de7cb 100644 --- a/src/pcre2_xclass.c +++ b/src/pcre2_xclass.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2023 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -38,9 +38,9 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -/* This module contains an internal function that is used to match an extended -class. It is used by pcre2_auto_possessify() and by both pcre2_match() and -pcre2_def_match(). */ +/* This module contains two internal functions that are used to match +OP_XCLASS and OP_ECLASS. It is used by pcre2_auto_possessify() and by both +pcre2_match() and pcre2_dfa_match(). */ #ifdef HAVE_CONFIG_H @@ -66,114 +66,75 @@ Returns: TRUE if character matches, else FALSE */ BOOL -PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf) +PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL utf) { +/* Update PRIV(update_classbits) when this function is changed. */ PCRE2_UCHAR t; -BOOL negated = (*data & XCL_NOT) != 0; +BOOL not_negated = (*data & XCL_NOT) == 0; +uint32_t type, max_index, min_index, value; +const uint8_t *next_char; #if PCRE2_CODE_UNIT_WIDTH == 8 /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ utf = TRUE; #endif -/* Code points < 256 are matched against a bitmap, if one is present. If not, -we still carry on, because there may be ranges that start below 256 in the -additional data. */ +/* Code points < 256 are matched against a bitmap, if one is present. */ -if (c < 256) +if ((*data++ & XCL_MAP) != 0) { - if ((*data & XCL_HASPROP) == 0) - { - if ((*data & XCL_MAP) == 0) return negated; - return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0; - } - if ((*data & XCL_MAP) != 0 && - (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0) - return !negated; /* char found */ + if (c < 256) + return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0; + /* Skip bitmap. */ + data += 32 / sizeof(PCRE2_UCHAR); } -/* First skip the bit map if present. Then match against the list of Unicode -properties or large chars or ranges that end with a large char. We won't ever +/* Match against the list of Unicode properties. We won't ever encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */ - -if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR); - -while ((t = *data++) != XCL_END) - { - uint32_t x, y; - if (t == XCL_SINGLE) - { -#ifdef SUPPORT_UNICODE - if (utf) - { - GETCHARINC(x, data); /* macro generates multiple statements */ - } - else -#endif - x = *data++; - if (c == x) return !negated; - } - else if (t == XCL_RANGE) - { #ifdef SUPPORT_UNICODE - if (utf) - { - GETCHARINC(x, data); /* macro generates multiple statements */ - GETCHARINC(y, data); /* macro generates multiple statements */ - } - else -#endif - { - x = *data++; - y = *data++; - } - if (c >= x && c <= y) return !negated; - } +if (*data == XCL_PROP || *data == XCL_NOTPROP) + { + /* The UCD record is the same for all properties. */ + const ucd_record *prop = GET_UCD(c); -#ifdef SUPPORT_UNICODE - else /* XCL_PROP & XCL_NOTPROP */ + do { int chartype; - const ucd_record *prop = GET_UCD(c); - BOOL isprop = t == XCL_PROP; + BOOL isprop = (*data++) == XCL_PROP; BOOL ok; switch(*data) { - case PT_ANY: - if (isprop) return !negated; - break; - case PT_LAMP: chartype = prop->chartype; if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == isprop) return !negated; + chartype == ucp_Lt) == isprop) return not_negated; break; case PT_GC: if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) - return !negated; + return not_negated; break; case PT_PC: - if ((data[1] == prop->chartype) == isprop) return !negated; + if ((data[1] == prop->chartype) == isprop) return not_negated; break; case PT_SC: - if ((data[1] == prop->script) == isprop) return !negated; + if ((data[1] == prop->script) == isprop) return not_negated; break; case PT_SCX: ok = (data[1] == prop->script || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0); - if (ok == isprop) return !negated; + if (ok == isprop) return not_negated; break; case PT_ALNUM: chartype = prop->chartype; if ((PRIV(ucp_gentype)[chartype] == ucp_L || PRIV(ucp_gentype)[chartype] == ucp_N) == isprop) - return !negated; + return not_negated; break; /* Perl space used to exclude VT, but from Perl 5.18 it is included, @@ -186,12 +147,12 @@ while ((t = *data++) != XCL_END) { HSPACE_CASES: VSPACE_CASES: - if (isprop) return !negated; + if (isprop) return not_negated; break; default: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop) - return !negated; + return not_negated; break; } break; @@ -201,7 +162,7 @@ while ((t = *data++) != XCL_END) if ((PRIV(ucp_gentype)[chartype] == ucp_L || PRIV(ucp_gentype)[chartype] == ucp_N || chartype == ucp_Mn || chartype == ucp_Pc) == isprop) - return !negated; + return not_negated; break; case PT_UCNC: @@ -209,24 +170,24 @@ while ((t = *data++) != XCL_END) { if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || c == CHAR_GRAVE_ACCENT) == isprop) - return !negated; + return not_negated; } else { if ((c < 0xd800 || c > 0xdfff) == isprop) - return !negated; + return not_negated; } break; case PT_BIDICL: if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop) - return !negated; + return not_negated; break; case PT_BOOL: ok = MAPBIT(PRIV(ucd_boolprop_sets) + UCD_BPROPS_PROP(prop), data[1]) != 0; - if (ok == isprop) return !negated; + if (ok == isprop) return not_negated; break; /* The following three properties can occur only in an XCLASS, as there @@ -248,7 +209,7 @@ while ((t = *data++) != XCL_END) (chartype == ucp_Cf && c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069)) )) == isprop) - return !negated; + return not_negated; break; /* Printable character: same as graphic, with the addition of Zs, i.e. @@ -262,7 +223,7 @@ while ((t = *data++) != XCL_END) (chartype == ucp_Cf && c != 0x061c && (c < 0x2066 || c > 0x2069)) )) == isprop) - return !negated; + return not_negated; break; /* Punctuation: all Unicode punctuation, plus ASCII characters that @@ -273,7 +234,7 @@ while ((t = *data++) != XCL_END) chartype = prop->chartype; if ((PRIV(ucp_gentype)[chartype] == ucp_P || (c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop) - return !negated; + return not_negated; break; /* Perl has two sets of hex digits */ @@ -285,24 +246,300 @@ while ((t = *data++) != XCL_END) (c >= 0xff10 && c <= 0xff19) || /* Fullwidth digits */ (c >= 0xff21 && c <= 0xff26) || /* Fullwidth letters */ (c >= 0xff41 && c <= 0xff46)) == isprop) - return !negated; + return not_negated; break; /* This should never occur, but compilers may mutter if there is no default. */ default: + PCRE2_DEBUG_UNREACHABLE(); return FALSE; } data += 2; } + while (*data == XCL_PROP || *data == XCL_NOTPROP); + } #else (void)utf; /* Avoid compiler warning */ #endif /* SUPPORT_UNICODE */ + +/* Match against large chars or ranges that end with a large char. */ +if (*data < XCL_LIST) + { + while ((t = *data++) != XCL_END) + { + uint32_t x, y; + +#ifdef SUPPORT_UNICODE + if (utf) + { + GETCHARINC(x, data); /* macro generates multiple statements */ + } + else +#endif + x = *data++; + + if (t == XCL_SINGLE) + { + /* Since character ranges follow the properties, and they are + sorted, early return is possible for all characters <= x. */ + if (c <= x) return (c == x) ? not_negated : !not_negated; + continue; + } + + PCRE2_ASSERT(t == XCL_RANGE); +#ifdef SUPPORT_UNICODE + if (utf) + { + GETCHARINC(y, data); /* macro generates multiple statements */ + } + else +#endif + y = *data++; + + /* Since character ranges follow the properties, and they are + sorted, early return is possible for all characters <= y. */ + if (c <= y) return (c >= x) ? not_negated : !not_negated; + } + + return !not_negated; /* char did not match */ + } + +#if PCRE2_CODE_UNIT_WIDTH == 8 +type = (uint32_t)(data[0] << 8) | data[1]; +data += 2; +#else +type = data[0]; +data++; +#endif /* CODE_UNIT_WIDTH */ + +/* Align characters. */ +next_char = char_lists_end - (GET(data, 0) << 1); +type &= XCL_TYPE_MASK; + +/* Alignment check. */ +PCRE2_ASSERT(((uintptr_t)next_char & 0x1) == 0); + +if (c >= XCL_CHAR_LIST_HIGH_16_START) + { + max_index = type & XCL_ITEM_COUNT_MASK; + if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint16_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 2; + } + + next_char += max_index << 1; + type >>= XCL_TYPE_BIT_LEN; } -return negated; /* char did not match */ +if (c < XCL_CHAR_LIST_LOW_32_START) + { + max_index = type & XCL_ITEM_COUNT_MASK; + + c = (uint16_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END); + + if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint16_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 2; + } + + if (max_index == 0 || c < *(const uint16_t*)next_char) + return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated; + + min_index = 0; + value = ((const uint16_t*)next_char)[--max_index]; + if (c >= value) + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + + max_index--; + + /* Binary search of a range. */ + while (TRUE) + { + uint32_t mid_index = (min_index + max_index) >> 1; + value = ((const uint16_t*)next_char)[mid_index]; + + if (c < value) + max_index = mid_index - 1; + else if (((const uint16_t*)next_char)[mid_index + 1] <= c) + min_index = mid_index + 1; + else + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + } + } + +/* Skip the 16 bit ranges. */ +max_index = type & XCL_ITEM_COUNT_MASK; +if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint16_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 2; + } + +next_char += (max_index << 1); +type >>= XCL_TYPE_BIT_LEN; + +/* Alignment check. */ +PCRE2_ASSERT(((uintptr_t)next_char & 0x3) == 0); + +max_index = type & XCL_ITEM_COUNT_MASK; + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (c >= XCL_CHAR_LIST_HIGH_32_START) + { + if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint32_t*)next_char; + PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK); + next_char += 4; + } + + next_char += max_index << 2; + type >>= XCL_TYPE_BIT_LEN; + max_index = type & XCL_ITEM_COUNT_MASK; + } +#endif + +c = (uint32_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END); + +if (max_index == XCL_ITEM_COUNT_MASK) + { + max_index = *(const uint32_t*)next_char; + next_char += 4; + } + +if (max_index == 0 || c < *(const uint32_t*)next_char) + return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated; + +min_index = 0; +value = ((const uint32_t*)next_char)[--max_index]; +if (c >= value) + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + +max_index--; + +/* Binary search of a range. */ +while (TRUE) + { + uint32_t mid_index = (min_index + max_index) >> 1; + value = ((const uint32_t*)next_char)[mid_index]; + + if (c < value) + max_index = mid_index - 1; + else if (((const uint32_t*)next_char)[mid_index + 1] <= c) + min_index = mid_index + 1; + else + return (value == c || (value & XCL_CHAR_END) == 0) == not_negated; + } +} + + + +/************************************************* +* Match character against an ECLASS * +*************************************************/ + +/* This function is called to match a character against an extended class +used for describing characters using boolean operations on sets. + +Arguments: + c the character + data_start points to the start of the ECLASS data + data_end points one-past-the-last of the ECLASS data + utf TRUE if in UTF mode + +Returns: TRUE if character matches, else FALSE +*/ + +BOOL +PRIV(eclass)(uint32_t c, PCRE2_SPTR data_start, PCRE2_SPTR data_end, + const uint8_t *char_lists_end, BOOL utf) +{ +PCRE2_SPTR ptr = data_start; +PCRE2_UCHAR flags; +uint32_t stack = 0; +int stack_depth = 0; + +PCRE2_ASSERT(data_start < data_end); +flags = *ptr++; +PCRE2_ASSERT((flags & ECL_MAP) == 0 || + (data_end - ptr) >= 32 / (int)sizeof(PCRE2_UCHAR)); + +/* Code points < 256 are matched against a bitmap, if one is present. +Otherwise all codepoints are checked later. */ + +if ((flags & ECL_MAP) != 0) + { + if (c < 256) + return (((const uint8_t *)ptr)[c/8] & (1u << (c&7))) != 0; + + /* Skip the bitmap. */ + ptr += 32 / sizeof(PCRE2_UCHAR); + } + +/* Do a little loop, until we reach the end of the ECLASS. */ +while (ptr < data_end) + { + switch (*ptr) + { + case ECL_AND: + ++ptr; + stack = (stack >> 1) & (stack | ~(uint32_t)1u); + PCRE2_ASSERT(stack_depth >= 2); + --stack_depth; + break; + + case ECL_OR: + ++ptr; + stack = (stack >> 1) | (stack & (uint32_t)1u); + PCRE2_ASSERT(stack_depth >= 2); + --stack_depth; + break; + + case ECL_XOR: + ++ptr; + stack = (stack >> 1) ^ (stack & (uint32_t)1u); + PCRE2_ASSERT(stack_depth >= 2); + --stack_depth; + break; + + case ECL_NOT: + ++ptr; + stack ^= (uint32_t)1u; + PCRE2_ASSERT(stack_depth >= 1); + break; + + case ECL_XCLASS: + { + uint32_t matched = PRIV(xclass)(c, ptr + 1 + LINK_SIZE, char_lists_end, utf); + + ptr += GET(ptr, 1); + stack = (stack << 1) | matched; + ++stack_depth; + break; + } + + /* This should never occur, but compilers may mutter if there is no + default. */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + return FALSE; + } + } + +PCRE2_ASSERT(stack_depth == 1); +(void)stack_depth; /* Ignore unused variable, if assertions are disabled. */ + +/* The final bit left on the stack now holds the match result. */ +return (stack & 1u) != 0; } /* End of pcre2_xclass.c */ diff --git a/src/pcre2grep.c b/src/pcre2grep.c index bb96067..d56bfb4 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS. The header can be found in the special z/OS distribution, which is available from www.zaconsultants.net or from www.cbttape.org. - Copyright (c) 1997-2023 University of Cambridge + Copyright (c) 1997-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -87,6 +87,10 @@ POSSIBILITY OF SUCH DAMAGE. #endif #endif +#ifdef SUPPORT_VALGRIND +#include +#endif + #ifdef HAVE_UNISTD_H #include #endif @@ -290,6 +294,7 @@ static BOOL show_total_count = FALSE; static BOOL silent = FALSE; static BOOL utf = FALSE; static BOOL posix_digit = FALSE; +static BOOL posix_pattern_file = FALSE; static uint8_t utf8_buffer[8]; @@ -428,6 +433,7 @@ used to identify them. */ #define N_POSIX_DIGIT (-26) #define N_GROUP_SEPARATOR (-27) #define N_NO_GROUP_SEPARATOR (-28) +#define N_POSIX_PATFILE (-29) static option_item optionlist[] = { { OP_NODATA, N_NULL, NULL, "", "terminate options" }, @@ -449,6 +455,7 @@ static option_item optionlist[] = { { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" }, { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" }, + { OP_NODATA, N_POSIX_PATFILE, NULL, "posix-pattern-file", "use POSIX semantics for pattern files" }, { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" }, { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, { OP_STRING, N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" }, @@ -893,11 +900,12 @@ readdirectory(directory_type *dir) for (;;) { struct dirent *dent = readdir(dir); - if (dent == NULL) return NULL; + if (dent == NULL) break; if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) return dent->d_name; } -/* Control never reaches here */ + +return NULL; } static void @@ -1448,7 +1456,34 @@ while ((c = fgetc(f)) != EOF) return yield; } +/************************************************* +* Read one pattern from file * +*************************************************/ + +/* Wrap around read_one_line() to make sure any terminating '\n' is not +included in the pattern and empty patterns are correctly identified. + +Arguments: + buffer the buffer to read into + length maximum number of characters to read and report how many were + f the file + +Returns: TRUE if a pattern was read into buffer +*/ +static BOOL +read_pattern(char *buffer, PCRE2_SIZE *length, FILE *f) +{ +*buffer = '\0'; +*length = read_one_line(buffer, *length, f); +if (*length > 0 && buffer[*length-1] == '\n') *length = *length - 1; +if (posix_pattern_file && *length > 0 && buffer[*length-1] == '\r') + { + *length = *length - 1; + if (*length == 0) return TRUE; + } +return (*length > 0 || *buffer == '\n'); +} /************************************************* * Find end of line * @@ -1506,12 +1541,13 @@ switch(endlinetype) for (;;) { while (p < endptr && *p != '\r') p++; - if (++p >= endptr) + if (p == endptr) { *lenptr = 0; return endptr; } - if (*p == '\n') + p++; + if (p < endptr && *p == '\n') { *lenptr = 2; return p + 1; @@ -1522,42 +1558,25 @@ switch(endlinetype) case PCRE2_NEWLINE_ANYCRLF: while (p < endptr) { - int extra = 0; - int c = *((unsigned char *)p); - - if (utf && c >= 0xc0) + if (*p == '\n') { - int gcii, gcss; - extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ - gcss = 6*extra; - c = (c & utf8_table3[extra]) << gcss; - for (gcii = 1; gcii <= extra; gcii++) - { - gcss -= 6; - c |= (p[gcii] & 0x3f) << gcss; - } + *lenptr = 1; + return p + 1; } - p += 1 + extra; - - switch (c) + if (*p == '\r') { - case '\n': - *lenptr = 1; - return p; - - case '\r': - if (p < endptr && *p == '\n') + if (p + 1 < endptr && p[1] == '\n') { *lenptr = 2; - p++; + return p + 2; } - else *lenptr = 1; - return p; - default: - break; + *lenptr = 1; + return p + 1; } + + p++; } /* End of loop for ANYCRLF case */ *lenptr = 0; /* Must have hit the end */ @@ -1573,6 +1592,11 @@ switch(endlinetype) { int gcii, gcss; extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ + if (endptr - p < 1 + extra) + { + *lenptr = 0; /* Hit the end, halfway through a character */ + return endptr; + } gcss = 6*extra; c = (c & utf8_table3[extra]) << gcss; for (gcii = 1; gcii <= extra; gcii++) @@ -1589,26 +1613,26 @@ switch(endlinetype) case '\n': /* LF */ case '\v': /* VT */ case '\f': /* FF */ - *lenptr = 1; + *lenptr = 1 + extra; return p; case '\r': /* CR */ - if (p < endptr && *p == '\n') + if (extra == 0 && p < endptr && *p == '\n') { *lenptr = 2; p++; } - else *lenptr = 1; + else *lenptr = 1 + extra; return p; #ifndef EBCDIC case 0x85: /* Unicode NEL */ - *lenptr = utf? 2 : 1; + *lenptr = 1 + extra; return p; case 0x2028: /* Unicode LS */ case 0x2029: /* Unicode PS */ - *lenptr = 3; + *lenptr = 1 + extra; return p; #endif /* Not EBCDIC */ @@ -1659,33 +1683,53 @@ switch(endlinetype) return p; case PCRE2_NEWLINE_CRLF: + p -= 2; for (;;) { - p -= 2; while (p > startptr && p[-1] != '\n') p--; - if (p <= startptr + 1 || p[-2] == '\r') return p; + if (p == startptr) break; + if (p - startptr >= 2 && p[-2] == '\r') break; + p--; + } + return p; + + case PCRE2_NEWLINE_ANYCRLF: + if (p - startptr >= 2 && p[-2] == '\r' && p[-1] == '\n') p -= 2; + else p--; + while (p > startptr) + { + if (p[-1] == '\n' || p[-1] == '\r') break; + p--; } - /* Control can never get here */ + return p; case PCRE2_NEWLINE_ANY: - case PCRE2_NEWLINE_ANYCRLF: - if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; - if (utf) while ((*p & 0xc0) == 0x80) p--; + if (p - startptr >= 2 && p[-2] == '\r' && p[-1] == '\n') p -= 2; + else + { + if (utf) while (p > startptr && (p[-1] & 0xc0) == 0x80) p--; + if (p > startptr) p--; + } while (p > startptr) { - unsigned int c; + int c; char *pp = p - 1; if (utf) { int extra = 0; - while ((*pp & 0xc0) == 0x80) pp--; + while (pp > startptr && (*pp & 0xc0) == 0x80) pp--; c = *((unsigned char *)pp); if (c >= 0xc0) { int gcii, gcss; extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ + if (p - pp < 1 + extra) + { + p = pp; /* Rewind over the broken character */ + continue; + } gcss = 6*extra; c = (c & utf8_table3[extra]) << gcss; for (gcii = 1; gcii <= extra; gcii++) @@ -1697,17 +1741,7 @@ switch(endlinetype) } else c = *((unsigned char *)pp); - if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c) - { - case '\n': /* LF */ - case '\r': /* CR */ - return p; - - default: - break; - } - - else switch (c) + switch (c) { case '\n': /* LF */ case '\v': /* VT */ @@ -1727,7 +1761,7 @@ switch(endlinetype) p = pp; /* Back one character */ } /* End of loop for ANY case */ - return startptr; /* Hit start of data */ + return p; } /* End of overall switch */ } @@ -1991,11 +2025,23 @@ switch (*(++string)) *last = string; return DDE_ERROR; + case '&': + /* In a callout, no capture is available. Return the character '0' for + consistency with $0. */ + + if (callout) *value = '0'; + else + { + *value = 0; + rc = DDE_CAPTURE; + } + break; + case '{': brace = TRUE; string++; - if (!isdigit((unsigned char)(*string))) /* Syntax error: a decimal number required. */ - { + if (!isdigit((unsigned char)(*string))) /* Syntax error: */ + { /* a decimal number required. */ if (!callout) fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", (int)(string - begin), "decimal number expected"); @@ -2072,9 +2118,9 @@ switch (*(++string)) { if (!isxdigit(*string)) break; if (*string >= '0' && *string <= '9') - c = c *16 + *string++ - '0'; + c = c *16 + (*string++ - '0'); else - c = c * 16 + (*string++ | 0x20) - 'a' + 10; + c = c * 16 + ((*string++ | 0x20) - 'a') + 10; } *value = c; string--; /* Point to last digit */ @@ -2454,10 +2500,13 @@ while (length > 0) break; /* LCOV_EXCL_START */ - default: /* Even though this should not occur, the string having */ - case DDE_ERROR: /* been checked above, we need to include the free() */ - free(args); /* calls so that source checkers do not complain. */ + default: + /* Even though this should not occur, the string having been checked above, + * we need to include the free() calls so that source checkers do not complain. */ + case DDE_ERROR: + free(args); free(argsvector); + abort(); return 0; /* LCOV_EXCL_STOP */ } @@ -2480,6 +2529,7 @@ while (length > 0) necessary, otherwise assume fork(). */ #ifdef WIN32 +(void)fflush(stdout); result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector); #elif defined __VMS @@ -2503,6 +2553,7 @@ result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector); } #else /* Neither Windows nor VMS */ +(void)fflush(stdout); pid = fork(); if (pid == 0) { @@ -2512,9 +2563,7 @@ if (pid == 0) } else if (pid > 0) { - (void)fflush(stdout); (void)waitpid(pid, &result, 0); - (void)fflush(stdout); } #endif /* End Windows/VMS/other handling */ @@ -2539,6 +2588,7 @@ static PCRE2_SIZE fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length, BOOL input_line_buffered) { +PCRE2_SIZE nread; (void)frtype; /* Avoid warning when not used */ #ifdef SUPPORT_LIBZ @@ -2553,9 +2603,16 @@ if (frtype == FR_LIBBZ2) else #endif -return (input_line_buffered ? +nread = (input_line_buffered ? read_one_line(buffer, length, (FILE *)handle) : fread(buffer, 1, length, (FILE *)handle)); + +#ifdef SUPPORT_VALGRIND +if (nread > 0) VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(buffer, nread); +if (nread < length) VALGRIND_MAKE_MEM_UNDEFINED(buffer + nread, length - nread); +#endif + +return nread; } @@ -2952,12 +3009,15 @@ while (ptr < endptr) FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); lastmatchrestart = pp; } + if (lastmatchrestart != ptr) hyphenpending = TRUE; } - /* If there were non-contiguous lines printed above, insert hyphens. */ + /* If hyphenpending is TRUE when there is no "after" context, it means we + are at the start of a new file, having output something from the previous + file. Output a separator if enabled.*/ - if (hyphenpending) + else if (hyphenpending) { if (group_separator != NULL) fprintf(stdout, "%s%s", group_separator, STDOUT_NL); @@ -2984,6 +3044,7 @@ while (ptr < endptr) if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted && group_separator != NULL) fprintf(stdout, "%s%s", group_separator, STDOUT_NL); + hyphenpending = FALSE; while (p < ptr) { @@ -2998,12 +3059,23 @@ while (ptr < endptr) } } + /* If hyphenpending is TRUE here, it was set after outputting some + "after" lines (and there are no "before" lines). */ + + else if (hyphenpending) + { + if (group_separator != NULL) + fprintf(stdout, "%s%s", group_separator, STDOUT_NL); + hyphenpending = FALSE; + } + /* Now print the matching line(s); ensure we set hyphenpending at the end of the file if any context lines are being output. */ if (after_context > 0 || before_context > 0) endhyphenpending = TRUE; + if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_colon); if (number) fprintf(stdout, "%lu:", linenumber); @@ -3598,6 +3670,7 @@ switch(letter) case N_NOJIT: use_jit = FALSE; break; case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break; case N_NO_GROUP_SEPARATOR: group_separator = NULL; break; + case N_POSIX_PATFILE: posix_pattern_file = TRUE; break; case 'a': binary_files = BIN_TEXT; break; case 'c': count_only = TRUE; break; case N_POSIX_DIGIT: posix_digit = TRUE; break; @@ -3808,11 +3881,19 @@ else filename = name; } -while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0) +while (TRUE) { - while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; + patlen = sizeof(buffer); + if (!read_pattern(buffer, &patlen, f)) + break; + + if (!posix_pattern_file) + { + while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; + } + linenumber++; - if (patlen == 0) continue; /* Skip blank lines */ + if (!posix_pattern_file && patlen == 0) continue; /* Skip blank lines */ /* Note: this call to add_pattern() puts a pointer to the local variable "buffer" into the pattern chain. However, that pointer is used only when @@ -4244,6 +4325,8 @@ function in the match context. */ #ifdef SUPPORT_PCRE2GREP_CALLOUT pcre2_set_callout(match_context, pcre2grep_callout, NULL); +#else +extra_options |= PCRE2_EXTRA_NEVER_CALLOUT; #endif /* Put limits into the match context. */ @@ -4381,6 +4464,8 @@ if (no_ucp) pcre2_options &= ~PCRE2_UCP; if (case_restrict) extra_options |= PCRE2_EXTRA_CASELESS_RESTRICT; if (posix_digit) extra_options |= (PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_DIGIT); +if ((pcre2_options & PCRE2_LITERAL) != 0) + extra_options &= ~PCRE2_EXTRA_NEVER_CALLOUT; /* Set the extra options in the compile context. */ diff --git a/src/pcre2posix.c b/src/pcre2posix.c index 9fe3199..f9dcbce 100644 --- a/src/pcre2posix.c +++ b/src/pcre2posix.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -94,11 +94,13 @@ changed. This #define is a copy of the one in pcre2_internal.h. */ #include "pcre2.h" #include "pcre2posix.h" +#include "pcre2_util.h" /* Table to translate PCRE2 compile time error codes into POSIX error codes. Only a few PCRE2 errors with a value greater than 23 turn into special POSIX codes: most go to REG_BADPAT. The second table lists, in pairs, those that -don't. */ +don't, even though some of them cannot currently be provoked from within the +POSIX wrapper. */ static const int eint1[] = { 0, /* No error */ @@ -137,7 +139,9 @@ static const int eint2[] = { 37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */ 56, REG_INVARG, /* internal error: unknown newline setting */ 92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */ - 99, REG_EESCAPE /* \K in lookaround */ + 98, REG_EESCAPE, /* missing digit after \0 in NO_BS0 mode */ + 99, REG_EESCAPE, /* \K in lookaround */ + 102, REG_EESCAPE /* \ddd octal > \377 in PYTHON_OCTAL mode */ }; /* Table of texts corresponding to POSIX error codes */ @@ -191,7 +195,7 @@ if (preg != NULL && (int)preg->re_erroffset != -1) /* no need to deal with UB in snprintf */ if (errbuf_size > INT_MAX) errbuf_size = INT_MAX; - /* there are 11 charactes between message and offset, + /* there are 11 characters between message and offset; update message_len() if changed */ ret = snprintf(errbuf, errbuf_size, "%s at offset %d", message, (int)preg->re_erroffset); @@ -207,6 +211,8 @@ else ret = (int)len; } +PCRE2_ASSERT(len > 0 || preg != NULL); + do { if (ret < 0) { diff --git a/src/pcre2test.c b/src/pcre2test.c index 3790345..80ab4f8 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -241,7 +241,7 @@ to hold them as 32-bit code units. */ enum { PR_OK, PR_SKIP, PR_ABEND }; /* The macro PRINTABLE determines whether to print an output character as-is or -as a hex value when showing compiled patterns. is We use it in cases when the +as a hex value when showing compiled patterns. We use it in cases when the locale has not been explicitly changed, so as to get consistent output from systems that differ in their output from isprint() even in the "C" locale. */ @@ -468,6 +468,7 @@ enum { MOD_CTC, /* Applies to a compile context */ MOD_NL, /* Is a newline value */ MOD_NN, /* Is a number or a name; more than one may occur */ MOD_OPT, /* Is an option bit */ + MOD_OPTMZ, /* Is an optimization directive */ MOD_SIZ, /* Is a PCRE2_SIZE value */ MOD_STR }; /* Is a string */ @@ -532,6 +533,7 @@ so many of them that they are split into two fields. */ #define CTL2_NULL_SUBJECT 0x00002000u #define CTL2_NULL_REPLACEMENT 0x00004000u #define CTL2_FRAMESIZE 0x00008000u +#define CTL2_SUBSTITUTE_CASE_CALLOUT 0x00010000u #define CTL2_HEAPFRAMES_SIZE 0x20000000u /* Informational */ #define CTL2_NL_SET 0x40000000u /* Informational */ @@ -561,6 +563,7 @@ different things in the two cases. */ CTL2_SUBSTITUTE_UNKNOWN_UNSET|\ CTL2_SUBSTITUTE_UNSET_EMPTY|\ CTL2_ALLVECTOR|\ + CTL2_SUBSTITUTE_CASE_CALLOUT|\ CTL2_HEAPFRAMES_SIZE) /* Structures for holding modifier information for patterns and subject strings @@ -573,9 +576,9 @@ typedef struct patctl { /* Structure for pattern modifiers. */ uint32_t control; /* Must be in same position as datctl */ uint32_t control2; /* Must be in same position as datctl */ uint32_t jitstack; /* Must be in same position as datctl */ - uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ - uint32_t substitute_skip; /* Must be in same position as patctl */ - uint32_t substitute_stop; /* Must be in same position as patctl */ + uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ + uint32_t substitute_skip; /* Must be in same position as datctl */ + uint32_t substitute_stop; /* Must be in same position as datctl */ uint32_t jit; uint32_t stackguard_test; uint32_t tables_id; @@ -595,7 +598,7 @@ typedef struct datctl { /* Structure for data line modifiers. */ uint32_t control; /* Must be in same position as patctl */ uint32_t control2; /* Must be in same position as patctl */ uint32_t jitstack; /* Must be in same position as patctl */ - uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ + uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ uint32_t substitute_skip; /* Must be in same position as patctl */ uint32_t substitute_stop; /* Must be in same position as patctl */ uint32_t startend[2]; @@ -651,6 +654,7 @@ static modstruct modlist[] = { { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) }, { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) }, + { "alt_extended_class", MOD_PAT, MOD_OPT, PCRE2_ALT_EXTENDED_CLASS, PO(options) }, { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, @@ -661,6 +665,8 @@ static modstruct modlist[] = { { "ascii_digit", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT, CO(extra_options) }, { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) }, { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, + { "auto_possess", MOD_CTC, MOD_OPTMZ, PCRE2_AUTO_POSSESS, 0 }, + { "auto_possess_off", MOD_CTC, MOD_OPTMZ, PCRE2_AUTO_POSSESS_OFF, 0 }, { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) }, { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, @@ -688,6 +694,8 @@ static modstruct modlist[] = { { "disable_recurseloop_check", MOD_DAT, MOD_OPT, PCRE2_DISABLE_RECURSELOOP_CHECK, DO(options) }, { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, + { "dotstar_anchor", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR, 0 }, + { "dotstar_anchor_off", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR_OFF, 0 }, { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) }, { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) }, @@ -705,14 +713,14 @@ static modstruct modlist[] = { { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) }, { "heapframes_size", MOD_PND, MOD_CTL, CTL2_HEAPFRAMES_SIZE, PO(control2) }, - { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, + { "hex", MOD_PATP, MOD_CTL, CTL_HEXPAT, PO(control) }, { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, { "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) }, { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) }, { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) }, - { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) }, + { "locale", MOD_PATP, MOD_STR, LOCALESIZE, PO(locale) }, { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) }, { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) }, { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, @@ -725,11 +733,13 @@ static modstruct modlist[] = { { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) }, + { "never_callout", MOD_CTC, MOD_OPT, PCRE2_EXTRA_NEVER_CALLOUT, CO(extra_options) }, { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, + { "no_bs0", MOD_CTC, MOD_OPT, PCRE2_EXTRA_NO_BS0, CO(extra_options) }, { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) }, { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, @@ -744,6 +754,8 @@ static modstruct modlist[] = { { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) }, { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, + { "optimization_full", MOD_CTC, MOD_OPTMZ, PCRE2_OPTIMIZATION_FULL, 0 }, + { "optimization_none", MOD_CTC, MOD_OPTMZ, PCRE2_OPTIMIZATION_NONE, 0 }, { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, @@ -756,14 +768,18 @@ static modstruct modlist[] = { { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) }, + { "python_octal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_PYTHON_OCTAL, CO(extra_options) }, { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */ { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, + { "start_optimize", MOD_CTC, MOD_OPTMZ, PCRE2_START_OPTIMIZE, 0 }, + { "start_optimize_off", MOD_CTC, MOD_OPTMZ, PCRE2_START_OPTIMIZE_OFF, 0 }, { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) }, { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) }, { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) }, + { "substitute_case_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CASE_CALLOUT, PO(control2) }, { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) }, { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) }, { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) }, @@ -774,6 +790,7 @@ static modstruct modlist[] = { { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) }, { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) }, { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, + { "turkish_casing", MOD_CTC, MOD_OPT, PCRE2_EXTRA_TURKISH_CASING, CO(extra_options) }, { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) }, @@ -920,7 +937,8 @@ enum { CONF_BSR, CONF_FIX, CONF_FIZ, CONF_INT, - CONF_NL + CONF_NL, + CONF_JU }; static coptstruct coptlist[] = { @@ -929,6 +947,7 @@ static coptstruct coptlist[] = { { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, { "jit", CONF_INT, PCRE2_CONFIG_JIT }, + { "jitusable", CONF_JU, 0 }, { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE }, { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, { "pcre2-16", CONF_FIX, SUPPORT_16 }, @@ -944,6 +963,13 @@ static coptstruct coptlist[] = { #undef SUPPORT_32 #undef SUPPORT_EBCDIC +/* Types for the parser, to be used in process_data() */ + +enum force_encoding { + FORCE_NONE, /* No preference, follow utf modifier */ + FORCE_RAW, /* Encode as a code point or error if too wide */ + FORCE_UTF /* Encode as a character or error if too wide */ +}; /* ----------------------- Static variables ------------------------ */ @@ -1480,6 +1506,22 @@ are supported. */ pcre2_set_substitute_callout_32(G(a,32), \ (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_substitute_case_callout_8(G(a,8),G(b,8),c); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_substitute_case_callout_16(G(a,16),G(b,16),c); \ + else \ + pcre2_set_substitute_case_callout_32(G(a,32),G(b,32),c) + +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(a) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_substitute_case_callout_8(G(a,8),NULL,NULL); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_substitute_case_callout_16(G(a,16),NULL,NULL); \ + else \ + pcre2_set_substitute_case_callout_32(G(a,32),NULL,NULL) + #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ if (test_mode == PCRE8_MODE) \ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \ @@ -1981,6 +2023,18 @@ the three different cases. */ G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \ (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_substitute_case_callout_,BITONE)(G(a,BITONE),G(b,BITONE),c); \ + else \ + G(pcre2_set_substitute_case_callout_,BITTWO)(G(a,BITTWO),G(b,BITTWO),c) + +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_substitute_case_callout_,BITONE)(G(a,BITONE),NULL,NULL); \ + else \ + G(pcre2_set_substitute_case_callout_,BITTWO)(G(a,BITTWO),NULL,NULL) + #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ @@ -2189,6 +2243,10 @@ the three different cases. */ #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ pcre2_set_substitute_callout_8(G(a,8), \ (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT(a,b,c) \ + pcre2_set_substitute_case_callout_8(G(a,8),G(b,8),c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(a) \ + pcre2_set_substitute_case_callout_8(G(a,8),NULL,NULL) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \ (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l) @@ -2298,6 +2356,10 @@ the three different cases. */ #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ pcre2_set_substitute_callout_16(G(a,16), \ (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT(a,b,c) \ + pcre2_set_substitute_case_callout_16(G(a,16),G(b,16),c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(a) \ + pcre2_set_substitute_case_callout_16(G(a,16),NULL,NULL) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \ (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l) @@ -2407,6 +2469,10 @@ the three different cases. */ #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ pcre2_set_substitute_callout_32(G(a,32), \ (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT(a,b,c) \ + pcre2_set_substitute_case_callout_32(G(a,32),G(b,32),c) +#define PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(a) \ + pcre2_set_substitute_case_callout_32(G(a,32),NULL,NULL) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \ (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) @@ -3587,7 +3653,7 @@ for (;;) } } -/* Control never gets here */ +PCRE2_UNREACHABLE(); /* Control never reaches here */ } @@ -3641,7 +3707,7 @@ while (top > bot) { int mid = (bot + top)/2; unsigned int mlen = strlen(modlist[mid].name); - int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen); + int c = strncmp((const char *)p, modlist[mid].name, (len < mlen)? len : mlen); if (c == 0) { if (len == mlen) return mid; @@ -3692,7 +3758,7 @@ if (restrict_for_perl_test) switch(m->which) break; default: - fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n", + fprintf(outfile, "** \"%s\" is not allowed in a Perl-compatible test\n", m->name); return NULL; } @@ -3733,7 +3799,7 @@ switch (m->which) if (field == NULL) { if (c == 0) - fprintf(outfile, "** '%s' is not valid here\n", m->name); + fprintf(outfile, "** \"%s\" is not valid here\n", m->name); else fprintf(outfile, "** /%c is not valid here\n", c); return NULL; @@ -3822,12 +3888,14 @@ for (;;) if (!first) { - fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); + fprintf(outfile, "** Unrecognized modifier \"%.*s\"\n", (int)(ep-p), p); if (ep - p == 1) fprintf(outfile, "** Single-character modifiers must come first\n"); return FALSE; } + first = FALSE; + for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p)) { for (i = 0; i < C1MODLISTCOUNT; i++) @@ -3835,8 +3903,8 @@ for (;;) if (i >= C1MODLISTCOUNT) { - fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n", - *p, (int)(ep-mp), mp); + fprintf(outfile, "** Unrecognized modifier '%c' in modifier string " + "\"%.*s\"\n", *p, (int)(ep-mp), mp); return FALSE; } @@ -3847,12 +3915,12 @@ for (;;) else { - index = scan_modifiers((uint8_t *)(c1modlist[i].fullname), + index = scan_modifiers((const uint8_t *)(c1modlist[i].fullname), strlen(c1modlist[i].fullname)); if (index < 0) { fprintf(outfile, "** Internal error: single-character equivalent " - "modifier '%s' not found\n", c1modlist[i].fullname); + "modifier \"%s\" not found\n", c1modlist[i].fullname); return FALSE; } c1modlist[i].index = index; /* Cache for next time */ @@ -3880,17 +3948,17 @@ for (;;) when needed. */ m = modlist + index; /* Save typing */ - if (m->type != MOD_CTL && m->type != MOD_OPT && + if (m->type != MOD_CTL && m->type != MOD_OPT && m->type != MOD_OPTMZ && (m->type != MOD_IND || *pp == '=')) { if (*pp++ != '=') { - fprintf(outfile, "** '=' expected after '%s'\n", m->name); + fprintf(outfile, "** '=' expected after \"%s\"\n", m->name); return FALSE; } if (off) { - fprintf(outfile, "** '-' is not valid for '%s'\n", m->name); + fprintf(outfile, "** '-' is not valid for \"%s\"\n", m->name); return FALSE; } } @@ -3921,6 +3989,21 @@ for (;;) else *((uint32_t *)field) |= m->value; break; + case MOD_OPTMZ: +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) + pcre2_set_optimize_8((pcre2_compile_context_8*)field, m->value); +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + pcre2_set_optimize_16((pcre2_compile_context_16*)field, m->value); +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) + pcre2_set_optimize_32((pcre2_compile_context_32*)field, m->value); +#endif + break; + case MOD_BSR: if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0) { @@ -4060,7 +4143,7 @@ for (;;) field = (char *)field + sizeof(int32_t); if (ct <= 0) { - fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name); + fprintf(outfile, "** Too many numeric \"%s\" modifiers\n", m->name); return FALSE; } } @@ -4078,13 +4161,13 @@ for (;;) { if (len > MAX_NAME_SIZE) { - fprintf(outfile, "** Group name in '%s' is too long\n", m->name); + fprintf(outfile, "** Group name in \"%s\" is too long\n", m->name); return FALSE; } while (*nn != 0) nn += strlen(nn) + 1; if (nn + len + 2 - (char *)field > LENCPYGET) { - fprintf(outfile, "** Too many characters in named '%s' modifiers\n", + fprintf(outfile, "** Too many characters in named \"%s\" modifiers\n", m->name); return FALSE; } @@ -4099,7 +4182,7 @@ for (;;) case MOD_STR: if (len + 1 > m->value) { - fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n", + fprintf(outfile, "** Overlong value for \"%s\" (max %d code units)\n", m->name, m->value - 1); return FALSE; } @@ -4111,12 +4194,11 @@ for (;;) if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) { - fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name); + fprintf(outfile, "** Comma expected after modifier item \"%s\"\n", m->name); return FALSE; } p = pp; - first = FALSE; if (ctx == CTX_POPPAT && (pctl->options != 0 || @@ -4124,7 +4206,7 @@ for (;;) pctl->locale[0] != 0 || (pctl->control & NOTPOP_CONTROLS) != 0)) { - fprintf(outfile, "** '%s' is not valid here\n", m->name); + fprintf(outfile, "** \"%s\" is not valid here\n", m->name); return FALSE; } } @@ -4132,7 +4214,7 @@ for (;;) return TRUE; INVALID_VALUE: -fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p); +fprintf(outfile, "** Invalid value in \"%.*s\"\n", (int)(ep-p), p); return FALSE; } @@ -4212,7 +4294,7 @@ Returns: nothing static void show_controls(uint32_t controls, uint32_t controls2, const char *before) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", @@ -4253,6 +4335,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "", ((controls & CTL_STARTCHAR) != 0)? " startchar" : "", ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "", + ((controls2 & CTL2_SUBSTITUTE_CASE_CALLOUT) != 0)? " substitute_case_callout" : "", ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "", ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "", ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "", @@ -4285,10 +4368,11 @@ static void show_compile_options(uint32_t options, const char *before, const char *after) { if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", + ((options & PCRE2_ALT_EXTENDED_CLASS) != 0)? " alt_extended_class" : "", ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", @@ -4340,8 +4424,9 @@ show_compile_extra_options(uint32_t options, const char *before, const char *after) { if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, + ((options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) != 0) ? " allow_lookaround_bsk" : "", ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "", @@ -4354,10 +4439,41 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", + ((options & PCRE2_EXTRA_NEVER_CALLOUT) != 0)? " never_callout" : "", + ((options & PCRE2_EXTRA_NO_BS0) != 0)? " no_bs0" : "", + ((options & PCRE2_EXTRA_PYTHON_OCTAL) != 0)? " python_octal" : "", + ((options & PCRE2_EXTRA_TURKISH_CASING) != 0)? " turkish_casing" : "", after); } +/************************************************* +* Show optimization flags * +*************************************************/ + +/* +Arguments: + flags an options word + before text to print before + after text to print after + +Returns: nothing +*/ + +static void +show_optimize_flags(uint32_t flags, const char *before, const char *after) +{ +if (flags == 0) fprintf(outfile, "%s%s", before, after); +else fprintf(outfile, "%s%s%s%s%s%s%s", + before, + ((flags & PCRE2_OPTIM_AUTO_POSSESS) != 0) ? "auto_possess" : "", + ((flags & PCRE2_OPTIM_AUTO_POSSESS) != 0 && (flags >> 1) != 0) ? "," : "", + ((flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0) ? "dotstar_anchor" : "", + ((flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0 && (flags >> 2) != 0) ? "," : "", + ((flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) ? "start_optimize" : "", + after); +} + #ifdef SUPPORT_PCRE2_8 /************************************************* @@ -4397,7 +4513,7 @@ static void show_memory_info(void) { uint32_t name_count, name_entry_size; -PCRE2_SIZE size, cblock_size; +PCRE2_SIZE size, cblock_size, data_size; /* One of the test_mode values will always be true, but to stop a compiler warning we must initialize cblock_size. */ @@ -4417,18 +4533,19 @@ if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32); (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE); (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE); -/* The uint32_t variables are cast before multiplying to stop code analyzers -grumbling about potential overflow. */ +/* The uint32_t variables are cast before multiplying to avoid potential + integer overflow. */ +data_size = (PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size * (PCRE2_SIZE)code_unit_size; -fprintf(outfile, "Memory allocation - compiled block : %" SIZ_FORM "\n", size); -fprintf(outfile, "Memory allocation - code portion : %" SIZ_FORM "\n", size - - (PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size * (PCRE2_SIZE)code_unit_size - - cblock_size); +fprintf(outfile, "Memory allocation - code size : %" SIZ_FORM "\n", size - + cblock_size - data_size); +if (data_size != 0) + fprintf(outfile, "Memory allocation - data size : %" SIZ_FORM "\n", data_size); if (pat_patctl.jit != 0) { (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE); - fprintf(outfile, "Memory allocation - JIT code : %" SIZ_FORM "\n", size); + fprintf(outfile, "Memory allocation - JIT code : %" SIZ_FORM "\n", size); } } @@ -4732,7 +4849,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1])); #endif - nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size); + nametable = (void *)((PCRE2_UCHAR8 *)nametable + nameentrysize * code_unit_size); } } @@ -4773,6 +4890,9 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (extra_options != 0) show_compile_extra_options(extra_options, "Extra options:", "\n"); + if (FLD(compiled_code, optimization_flags) != PCRE2_OPTIMIZATION_ALL) + show_optimize_flags(FLD(compiled_code, optimization_flags), "Optimizations: ", "\n"); + if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 || @@ -4835,24 +4955,24 @@ if ((pat_patctl.control & CTL_INFO) != 0) { int i; int c = 24; - fprintf(outfile, "Starting code units: "); + fprintf(outfile, "Starting code units:"); for (i = 0; i < 256; i++) { if ((start_bits[i/8] & (1u << (i&7))) != 0) { if (c > 75) { - fprintf(outfile, "\n "); + fprintf(outfile, "\n "); c = 2; } if (PRINTOK(i) && i != ' ') { - fprintf(outfile, "%c ", i); + fprintf(outfile, " %c", i); c += 2; } else { - fprintf(outfile, "\\x%02x ", i); + fprintf(outfile, " \\x%02x", i); c += 5; } } @@ -4875,7 +4995,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) } } - if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0) + if ((FLD(compiled_code, optimization_flags) & PCRE2_OPTIM_START_OPTIMIZE) != 0) fprintf(outfile, "Subject length lower bound = %d\n", minlength); if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) @@ -4970,7 +5090,7 @@ if (endf == filename) *fptr = fopen((const char *)filename, mode); if (*fptr == NULL) { - fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno)); + fprintf(outfile, "** Failed to open \"%s\": %s\n", filename, strerror(errno)); return PR_ABEND; } @@ -5067,14 +5187,14 @@ switch(cmd) { while (isspace(*argptr)) argptr++; if (*argptr == 0) break; - for (i = 1; i < sizeof(newlines)/sizeof(char *); i++) + for (uint16_t j = 1; j < sizeof(newlines)/sizeof(char *); j++) { - size_t nlen = strlen(newlines[i]); - if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 && + size_t nlen = strlen(newlines[j]); + if (strncmpic(argptr, (const uint8_t *)newlines[j], nlen) == 0 && isspace(argptr[nlen])) { - if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */ - if (first_listed_newline == 0) first_listed_newline = i; + if (j == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */ + if (first_listed_newline == 0) first_listed_newline = j; } } while (*argptr != 0 && !isspace(*argptr)) argptr++; @@ -5552,7 +5672,7 @@ if (pat_patctl.locale[0] != 0) } if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL) { - fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale); + fprintf(outfile, "** Failed to set locale \"%s\"\n", pat_patctl.locale); return PR_SKIP; } if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0) @@ -5560,7 +5680,7 @@ if (pat_patctl.locale[0] != 0) strcpy((char *)locale_name, (char *)pat_patctl.locale); if (locale_tables != NULL) { - PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables); + PCRE2_MAKETABLES_FREE(general_context, (const void *)locale_tables); } PCRE2_MAKETABLES(locale_tables, general_context); } @@ -6374,6 +6494,227 @@ return yield; } +/************************************************* +* Substitute case callout function * +*************************************************/ + +/* Function to implement our test-only custom case mappings. +To ease implementation, we only work in the ASCII range (so that we don't need +to read & write UTF sequences). +However, we aim to implement case mappings which fairly well represent the range +of interesting behaviours that exist for Unicode codepoints. */ + +static BOOL +case_transform(int to_case, int num_in, int *num_read, int *num_write, + uint32_t *c1, uint32_t *c2) +{ +/* Let's have one character which aborts the substitution. */ +if (*c1 == '!') return FALSE; + +/* Default behaviour is to read one character, and write back that same one +character (treating all characters as "uncased"). */ +*num_read = *num_write = 1; + +/* Add a normal case pair 'a' (l) <-> 'B' (t,u). Standard ASCII letter +behaviour, but with switched letters for testing. */ +if (*c1 == 'a' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'B'; +else if (*c1 == 'B' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'a'; + +/* Add a titlecased triplet 'd' (l) <-> 'D' (t) <-> 'Z' (u). Example: the +'dz'/'Dz'/'DZ' ligature character ("Latin Small Letter DZ" <-> "Latin Capital +Letter D with Small Letter Z" <-> "Latin Capital Letter DZ"). */ +else if (*c1 == 'd' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = (to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST)? 'D' : 'Z'; +else if (*c1 == 'D' && to_case != PCRE2_SUBSTITUTE_CASE_TITLE_FIRST) + *c1 = (to_case == PCRE2_SUBSTITUTE_CASE_LOWER)? 'd' : 'Z'; +else if (*c1 == 'Z' && to_case != PCRE2_SUBSTITUTE_CASE_UPPER) + *c1 = (to_case == PCRE2_SUBSTITUTE_CASE_LOWER)? 'd' : 'D'; + +/* Expands when uppercased. Example: Esszet 'f' <-> 'SS'. */ +else if (*c1 == 'f' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + { + *c1 = 'S'; + *c2 = 'S'; + *num_write = 2; + } +else if (*c1 == 's' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'S'; +else if (*c1 == 'S' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 's'; + +/* Expanding and contracting characters, 'o' <-> 'OO'. You can get this purely +due to UTF-8 encoding length, for example uppercase Omega (3 bytes in UTF-8) +lowercases to 2 bytes in UTF-8. */ +else if (num_in == 2 && *c1 == 'O' && *c2 == 'O' && + to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + { + *c1 = 'o'; + *num_read = 2; + } +else if (*c1 == 'o' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + { + *c1 = 'O'; + *c2 = 'O'; + *num_write = 2; + } +else if (num_in == 2 && *c1 == 'p' && *c2 == 'p' && + to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + { + *c1 = 'P'; + *num_read = 2; + } +else if (*c1 == 'P' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + { + *c1 = 'p'; + *c2 = 'p'; + *num_write = 2; + } + +/* Use 'l' -> 'Mn' or 'MN' as an expanding ligature, like 'fi' -> 'Fi' -> +'FI'. */ +else if (*c1 == 'l' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + { + *c1 = 'M'; + *c2 = (to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST)? 'n' : 'N'; + *num_write = 2; + } +else if (*c1 == 'M' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'm'; +else if (*c1 == 'm' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'M'; +else if (*c1 == 'N' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'n'; +else if (*c1 == 'n' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'N'; + +/* An example of a context-dependent mapping, the Greek Sigma. It lowercases +depending on the following character. Use 'c'/'k' -> 'K'. */ +else if ((*c1 == 'c' || *c1 == 'k') && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'K'; +else if (*c1 == 'K' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = (num_in == 1 || *c2 == ' ')? 'c' : 'k'; + +/* An example of a context-dependent multi mapping, the Dutch IJ. When those +letters appear together, they titlecase 'ij' (l) <-> 'IJ' (t) <-> 'IJ' (u). +Namely, English titlecasing of 'ijnssel' would be 'Ijnssel' (just uppercase the +first letter), but the Dutch rule is 'IJnssel'. */ +else if (num_in == 2 && (*c1 == 'i' || *c1 == 'I') && + (*c2 == 'j' || *c2 == 'J') && + to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST) + { + *c1 = 'I'; + *c2 = 'J'; + *num_read = 2; + *num_write = 2; + } +else if (*c1 == 'i' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'I'; +else if (*c1 == 'I' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'i'; +else if (*c1 == 'j' && to_case != PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'J'; +else if (*c1 == 'J' && to_case == PCRE2_SUBSTITUTE_CASE_LOWER) + *c1 = 'j'; + +return TRUE; +} + +/* Called from pcre2_substitute() when the substitute_case_callout +modifier is set. The substitute callout block is not identical for all code unit +widths, so we have to duplicate the function for each supported width. + +Arguments: + input the input character + input_len the length of the input + output the output buffer + output_cap the output buffer capacity + to_case the case conversion type + data_ptr callout data (unused) + +Returns: the number of code units of the output +*/ + +#define substitute_case_callout_function(BITS) \ +static PCRE2_SIZE \ +G(substitute_case_callout_function,BITS)( \ + G(PCRE2_SPTR,BITS) input, PCRE2_SIZE input_len, \ + G(PCRE2_UCHAR,BITS) *output, PCRE2_SIZE output_cap, \ + int to_case, void *data_ptr) \ +{ \ +G(PCRE2_UCHAR,BITS) buf[16]; \ +G(PCRE2_SPTR,BITS) input_copy; \ +PCRE2_SIZE written = 0; \ +\ +(void)data_ptr; /* Not used */ \ +\ +if (input_len > sizeof(buf)/sizeof(*buf)) \ + { \ + G(PCRE2_UCHAR,BITS) *input_buf = malloc( \ + input_len * sizeof(G(PCRE2_UCHAR,BITS))); \ + if (input_buf == NULL) return ~(PCRE2_SIZE)0; \ + memcpy(input_buf, input, input_len * sizeof(G(PCRE2_UCHAR,BITS))); \ + input_copy = input_buf; \ + } \ +else \ + { \ + memcpy(buf, input, input_len * sizeof(G(PCRE2_UCHAR,BITS))); \ + input_copy = buf; \ + } \ +\ +for (PCRE2_SIZE i = 0; i < input_len; ) \ + { \ + int num_in = i + 1 < input_len ? 2 : 1; \ + uint32_t c1 = input_copy[i]; \ + uint32_t c2 = i + 1 < input_len ? input_copy[i + 1] : 0; \ + int num_read; \ + int num_write; \ + \ + if (!case_transform(to_case, num_in, &num_read, &num_write, &c1, &c2)) \ + { \ + written = ~(PCRE2_SIZE)0; \ + goto END; \ + } \ + \ + i += num_read; \ + if (to_case == PCRE2_SUBSTITUTE_CASE_TITLE_FIRST) \ + to_case = PCRE2_SUBSTITUTE_CASE_LOWER; \ + \ + if (written + num_write > output_cap) \ + { \ + written += num_write; \ + } \ + else \ + { \ + if (num_write > 0) output[written++] = c1; \ + if (num_write > 1) output[written++] = c2; \ + } \ + } \ +\ +END: \ +if (input_copy != buf) free((G(PCRE2_UCHAR,BITS) *)input_copy); \ +\ +/* Let's be maximally cruel. The case callout is allowed to leave the output +buffer in any state at all if it overflows, so let's use random garbage. */ \ +if (written > output_cap) \ + memset(output, time(NULL) & 1 ? 0xcd : 0xdc, \ + output_cap * sizeof(G(PCRE2_UCHAR,BITS))); \ +\ +return written; \ +} + +#if defined SUPPORT_PCRE2_8 +substitute_case_callout_function(8) +#endif +#if defined SUPPORT_PCRE2_16 +substitute_case_callout_function(16) +#endif +#if defined SUPPORT_PCRE2_32 +substitute_case_callout_function(32) +#endif + + /************************************************* * Callout function * *************************************************/ @@ -6675,13 +7016,13 @@ for (;;) PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) - fprintf(outfile, "Number not found for group '%s'\n", nptr); + fprintf(outfile, "Number not found for group \"%s\"\n", nptr); length = sizeof(copybuffer)/code_unit_size; PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length); if (rc < 0) { - fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc); + fprintf(outfile, "Copy substring \"%s\" failed (%d): ", nptr, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else @@ -6689,7 +7030,7 @@ for (;;) PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2); if (rc < 0) { - fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc); + fprintf(outfile, "Get substring \"%s\" length failed (%d): ", nptr, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else if (length2 != length) @@ -6756,12 +7097,12 @@ for (;;) PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) - fprintf(outfile, "Number not found for group '%s'\n", nptr); + fprintf(outfile, "Number not found for group \"%s\"\n", nptr); PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length); if (rc < 0) { - fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc); + fprintf(outfile, "Get substring \"%s\" failed (%d): ", nptr, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else @@ -6919,7 +7260,11 @@ len = strlen((const char *)buffer); while (len > 0 && isspace(buffer[len-1])) len--; buffer[len] = 0; p = buffer; -while (isspace(*p)) p++; +while (isspace(*p)) + { + p++; + len--; + } /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */ @@ -6976,8 +7321,9 @@ in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier. while ((c = *p++) != 0) { - int32_t i = 0; + int i = 0; size_t replen; + enum force_encoding encoding = FORCE_NONE; /* ] may mark the end of a replicated sequence */ @@ -6999,6 +7345,7 @@ while ((c = *p++) != 0) fprintf(outfile, "** Repeat count too large\n"); return PR_OK; } + i = (int)li; p = (uint8_t *)endptr; if (*p++ != '}') @@ -7007,7 +7354,6 @@ while ((c = *p++) != 0) return PR_OK; } - i = (int32_t)li; if (i-- <= 0) { fprintf(outfile, "** Zero or negative repeat not allowed\n"); @@ -7085,8 +7431,10 @@ while ((c = *p++) != 0) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c -= '0'; - while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') - c = c * 8 + *p++ - '0'; + while (i++ < 2 && isdigit(*p) && *p < '8') + c = c * 8 + (*p++ - '0'); + + encoding = (utf && c > 255)? FORCE_UTF : FORCE_RAW; break; case 'o': @@ -7094,64 +7442,91 @@ while ((c = *p++) != 0) { uint8_t *pt = p; c = 0; - for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++) + for (pt++; isdigit(*pt) && *pt < '8'; ++i, pt++) { - if (++i == 12) - fprintf(outfile, "** Too many octal digits in \\o{...} item; " - "using only the first twelve.\n"); - else c = c * 8 + *pt - '0'; + if (c >= 0x20000000u) + { + fprintf(outfile, "** \\o{ escape too large\n"); + return PR_OK; + } + else c = c * 8 + (*pt - '0'); + } + if (i == 0 || *pt != '}') + { + fprintf(outfile, "** Malformed \\o{ escape\n"); + return PR_OK; } - if (*pt == '}') p = pt + 1; - else fprintf(outfile, "** Missing } after \\o{ (assumed)\n"); + else p = pt + 1; } break; case 'x': + c = 0; if (*p == '{') { uint8_t *pt = p; - c = 0; /* We used to have "while (isxdigit(*(++pt)))" here, but it fails when isxdigit() is a macro that refers to its argument more than once. This is banned by the C Standard, but apparently happens in at - least one MacOS environment. */ + least one macOS environment. */ for (pt++; isxdigit(*pt); pt++) { if (++i == 9) + { fprintf(outfile, "** Too many hex digits in \\x{...} item; " "using only the first eight.\n"); - else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); + while (isxdigit(*pt)) pt++; + break; + } + else c = c * 16 + (tolower(*pt) - (isdigit(*pt)? '0' : 'a' - 10)); } - if (*pt == '}') + if (i == 0 || *pt != '}') { - p = pt + 1; - break; + fprintf(outfile, "** Malformed \\x{ escape\n"); + return PR_OK; } - /* Not correct form for \x{...}; fall through */ + else p = pt + 1; } - - /* \x without {} always defines just one byte in 8-bit mode. This - allows UTF-8 characters to be constructed byte by byte, and also allows - invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode. - Otherwise, pass it down as data. */ - - c = 0; - while (i++ < 2 && isxdigit(*p)) + else { - c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); - p++; - } + /* \x without {} always defines just one byte in 8-bit mode. This + allows UTF-8 characters to be constructed byte by byte, and also allows + invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode. + Otherwise, pass it down as data. */ + + while (i++ < 2 && isxdigit(*p)) + { + c = c * 16 + (tolower(*p) - (isdigit(*p)? '0' : 'a' - 10)); + p++; + } #if defined SUPPORT_PCRE2_8 - if (utf && (test_mode == PCRE8_MODE)) - { - *q8++ = c; - continue; - } + if (utf && (test_mode == PCRE8_MODE)) encoding = FORCE_RAW; #endif + } break; + case 'N': + if (memcmp(p, "{U+", 3) == 0 && isxdigit(p[3])) + { + char *endptr; + unsigned long uli; + + p += 3; + errno = 0; + uli = strtoul((const char *)p, &endptr, 16); + if (errno == 0 && *endptr == '}' && uli <= UINT32_MAX) + { + c = (uint32_t)uli; + p = (uint8_t *)endptr + 1; + encoding = FORCE_UTF; + break; + } + } + fprintf(outfile, "** Malformed \\N{U+ escape\n"); + return PR_OK; + case 0: /* \ followed by EOF allows for an empty line */ p--; continue; @@ -7177,24 +7552,13 @@ while ((c = *p++) != 0) } /* We now have a character value in c that may be greater than 255. - In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater - than 127 in UTF mode must have come from \x{...} or octal constructs - because values from \x.. get this far only in non-UTF mode. */ + Depending of how we got it, the encoding enum could be set to tell + us how to encode it, otherwise follow the utf modifier. */ #ifdef SUPPORT_PCRE2_8 if (test_mode == PCRE8_MODE) { - if (utf) - { - if (c > 0x7fffffff) - { - fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff " - "and so cannot be converted to UTF-8\n", c); - return PR_OK; - } - q8 += ord2utf8(c, q8); - } - else + if (encoding == FORCE_RAW || !(utf || encoding == FORCE_UTF)) { if (c > 0xffu) { @@ -7205,45 +7569,71 @@ while ((c = *p++) != 0) } *q8++ = (uint8_t)c; } + else + { + if (c > 0x7fffffff) + { + fprintf(outfile, "** Character \\N{U+%x} is greater than 0x7fffffff " + "and therefore cannot be encoded as UTF-8\n", c); + return PR_OK; + } + else if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT) + fprintf(outfile, "** Warning: character \\N{U+%x} is greater than " + "0x%x and should not be encoded as UTF-8\n", + c, MAX_UTF_CODE_POINT); + q8 += ord2utf8(c, q8); + } } #endif #ifdef SUPPORT_PCRE2_16 if (test_mode == PCRE16_MODE) { - if (utf) + /* Unlike the 8-bit code, there are no forced raw suggestions for the + 16-bit mode, so assume raw unless utf is preferred */ + + if (!(encoding == FORCE_UTF || utf)) { - if (c > 0x10ffffu) + if (c > 0xffffu) { - fprintf(outfile, "** Failed: character \\x{%x} is greater than " - "0x10ffff and so cannot be converted to UTF-16\n", c); + fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " + "and UTF-16 mode is not enabled.\n", c); + fprintf(outfile, "** Truncation will probably give the wrong " + "result.\n"); + } + *q16++ = (uint16_t)c; + } + else + { + if (c > MAX_UTF_CODE_POINT) + { + fprintf(outfile, "** Failed: character \\N{U+%x} is greater than " + "0x%x and therefore cannot be encoded as UTF-16\n", + c, MAX_UTF_CODE_POINT); return PR_OK; } else if (c >= 0x10000u) { - c-= 0x10000u; + c -= 0x10000u; *q16++ = 0xD800 | (c >> 10); *q16++ = 0xDC00 | (c & 0x3ff); } else - *q16++ = c; - } - else - { - if (c > 0xffffu) { - fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " - "and UTF-16 mode is not enabled.\n", c); - fprintf(outfile, "** Truncation will probably give the wrong " - "result.\n"); + if (encoding == FORCE_UTF && 0xe000u > c && c >= 0xd800u) + fprintf(outfile, "** Warning: character \\N{U+%x} is a surrogate " + "and should not be encoded as UTF-16\n", c); + *q16++ = c; } - - *q16++ = (uint16_t)c; } } #endif #ifdef SUPPORT_PCRE2_32 if (test_mode == PCRE32_MODE) { + if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT) + fprintf(outfile, "** Warning: character \\N{U+%x} is greater than " + "0x%x and should not be encoded as UTF-32\n", + c, MAX_UTF_CODE_POINT); *q32++ = c; } #endif @@ -7279,7 +7669,7 @@ for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++) } } -if (pat_patctl.replacement[0] != 0) +if (dat_datctl.replacement[0] != 0) { if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 && (dat_datctl.control & CTL_NULLCONTEXT) != 0) @@ -7288,6 +7678,13 @@ if (pat_patctl.replacement[0] != 0) return PR_OK; } + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CASE_CALLOUT) != 0 && + (dat_datctl.control & CTL_NULLCONTEXT) != 0) + { + fprintf(outfile, "** Replacement case callouts are not supported with null_context.\n"); + return PR_OK; + } + if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) fprintf(outfile, "** Ignored with replacement text: allcaptures\n"); } @@ -7630,7 +8027,7 @@ if (dat_datctl.replacement[0] != 0) if (*pr == '[') { PCRE2_SIZE n = 0; - while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0; + while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + (c - CHAR_0); if (*pr++ != ']') { fprintf(outfile, "Bad buffer size in replacement string\n"); @@ -7715,6 +8112,15 @@ if (dat_datctl.replacement[0] != 0) PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */ } + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CASE_CALLOUT) != 0) + { + PCRE2_SET_SUBSTITUTE_CASE_CALLOUT(dat_context, substitute_case_callout_function, NULL); + } + else + { + PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(dat_context); /* No callout */ + } + /* There is a special option to set the replacement to NULL in order to test that case. */ @@ -7835,6 +8241,7 @@ for (gmatched = 0;; gmatched++) if ((dat_datctl.control & (CTL_FINDLIMITS|CTL_FINDLIMITS_NOHEAP)) != 0) { capcount = 0; /* This stops compiler warnings */ + (void)capcount; if ((dat_datctl.control & CTL_FINDLIMITS_NOHEAP) == 0 && (FLD(compiled_code, executable_jit) == NULL || @@ -7915,13 +8322,20 @@ for (gmatched = 0;; gmatched++) /* The result of the match is now in capcount. First handle a successful match. If pp was forced to be NULL (to test NULL handling) it will have been treated as an empty string if the length was zero. So re-create that for - outputting. */ + outputting. Don't just point to "" because that leads to a "loss of const" + warning. */ if (capcount >= 0) { - int i; - - if (pp == NULL) pp = (uint8_t *)""; + if (pp == NULL) + { +#ifdef SUPPORT_VALGRIND + /* Mark the start of dbuffer addressable again. */ + VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, 1); +#endif + pp = dbuffer; + pp[0] = 0; + } if (capcount > (int)oveccount) /* Check for lunatic return value */ { @@ -7946,11 +8360,11 @@ for (gmatched = 0;; gmatched++) fprintf(outfile, "** PCRE2 error: flag not set after copy_matched_subject\n"); - if (CASTFLD(void *, match_data, subject) == pp) + if (CASTFLD(const void *, match_data, subject) == pp) fprintf(outfile, "** PCRE2 error: copy_matched_subject has not copied\n"); - if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0) + if (memcmp(CASTFLD(const void *, match_data, subject), pp, ulen) != 0) fprintf(outfile, "** PCRE2 error: copy_matched_subject mismatch\n"); } @@ -7997,7 +8411,7 @@ for (gmatched = 0;; gmatched++) /* Output the captured substrings. Note that, for the matched string, the use of \K in an assertion can make the start later than the end. */ - for (i = 0; i < 2*capcount; i += 2) + for (int i = 0; i < 2*capcount; i += 2) { PCRE2_SIZE lleft, lmiddle, lright; PCRE2_SIZE start = ovector[i]; @@ -8128,7 +8542,7 @@ for (gmatched = 0;; gmatched++) TESTFLD(match_data, mark, !=, NULL)) { fprintf(outfile, "MK: "); - PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile); + PCHARSV(CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); fprintf(outfile, "\n"); } @@ -8160,7 +8574,7 @@ for (gmatched = 0;; gmatched++) TESTFLD(match_data, mark, !=, NULL)) { fprintf(outfile, ", mark="); - PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf, + PCHARS(rubriclength, CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); rubriclength += 7; } @@ -8176,9 +8590,8 @@ for (gmatched = 0;; gmatched++) if (backlength != 0) { - int i; - for (i = 0; i < rubriclength; i++) fprintf(outfile, " "); - for (i = 0; i < backlength; i++) fprintf(outfile, "<"); + for (int i = 0; i < rubriclength; i++) fprintf(outfile, " "); + for (int i = 0; i < backlength; i++) fprintf(outfile, "<"); fprintf(outfile, "\n"); } @@ -8259,7 +8672,7 @@ for (gmatched = 0;; gmatched++) TESTFLD(match_data, mark, !=, NULL)) { fprintf(outfile, ", mark = "); - PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile); + PCHARSV(CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); } if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) fprintf(outfile, " (JIT)"); @@ -8506,6 +8919,7 @@ printf(" bsr \\R type [ANYCRLF, ANY]\n"); printf(" ebcdic compiled for EBCDIC character code [0,1]\n"); printf(" ebcdic-nl NL code if compiled for EBCDIC\n"); printf(" jit just-in-time compiler supported [0, 1]\n"); +printf(" jitusable test JIT usability [0, 1, 2, 3]\n"); printf(" linksize internal link size [2, 3, 4]\n"); printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n"); printf(" pcre2-8 8 bit library support enabled [0, 1]\n"); @@ -8563,7 +8977,7 @@ if (arg != NULL && arg[0] != CHAR_MINUS) if (i >= COPTLISTCOUNT) { - fprintf(stderr, "** Unknown -C option '%s'\n", arg); + fprintf(stderr, "** Unknown -C option \"%s\"\n", arg); return 0; } @@ -8593,6 +9007,19 @@ if (arg != NULL && arg[0] != CHAR_MINUS) (void)PCRE2_CONFIG(coptlist[i].value, &optval); print_newline_config(optval, TRUE); break; + + case CONF_JU: + SET(compiled_code, NULL); + PCRE2_JIT_COMPILE(yield, compiled_code, PCRE2_JIT_TEST_ALLOC); + switch(yield) + { + case 0: break; + case PCRE2_ERROR_NOMEMORY: yield = 1; break; + case PCRE2_ERROR_JIT_UNSUPPORTED: yield = 2; break; + default: yield = 3; break; + } + printf("%d\n", yield); + break; } /* For VMS, return the value by setting a symbol, for certain values only. This @@ -8663,9 +9090,30 @@ else printf(" No Unicode support\n"); (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval); if (optval != 0) { - printf(" Just-in-time compiler support: "); + printf(" Just-in-time compiler support\n"); + printf(" Architecture: "); print_jit_target(stdout); printf("\n"); + + printf(" Can allocate executable memory: "); + SET(compiled_code, NULL); + PCRE2_JIT_COMPILE(yield, compiled_code, PCRE2_JIT_TEST_ALLOC); + switch(yield) + { + case 0: + printf("Yes\n"); + break; + + case PCRE2_ERROR_NOMEMORY: + printf("No (so cannot work)\n"); + break; + + default: + printf("\n** Unexpected return %d from " + "pcre2_jit_compile(NULL, PCRE2_JIT_TEST_ALLOC)\n", yield); + printf("** Should not occur\n"); + break; + } } else { @@ -8918,7 +9366,7 @@ for (i = 0; i < MODLISTCOUNT; i++) is_pattern = FALSE; break; - default: printf("** Unknown type for modifier '%s'\n", m->name); + default: printf("** Unknown type for modifier \"%s\"\n", m->name); /* Fall through */ case MOD_PD: /* Pattern or subject */ case MOD_PDP: /* As PD, OK for Perl-compatible test */ @@ -9313,7 +9761,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) else { - fprintf(stderr, "** Unknown or malformed option '%s'\n", arg); + fprintf(stderr, "** Unknown or malformed option \"%s\"\n", arg); usage(); yield = 1; goto EXIT; @@ -9371,7 +9819,7 @@ least 128 code units, because it is used for retrieving error messages. */ errcode = strtol(arg_error, &endptr, 10); if (*endptr != 0 && *endptr != CHAR_COMMA) { - fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error); + fprintf(stderr, "** \"%s\" is not a valid error number list\n", arg_error); yield = 1; goto EXIT; } @@ -9402,7 +9850,8 @@ least 128 code units, because it is used for retrieving error messages. */ if (*endptr == 0) goto EXIT; arg_error = endptr + 1; } - /* Control never reaches here */ + + PCRE2_UNREACHABLE(); /* Control never reaches here */ } /* End of -error handling */ /* Initialize things that cannot be done until we know which test mode we are @@ -9495,7 +9944,7 @@ if (argc > 1 && strcmp(argv[op], "-") != 0) infile = fopen(argv[op], INPUT_MODE); if (infile == NULL) { - printf("** Failed to open '%s': %s\n", argv[op], strerror(errno)); + printf("** Failed to open \"%s\": %s\n", argv[op], strerror(errno)); yield = 1; goto EXIT; } @@ -9510,7 +9959,7 @@ if (argc > 2) outfile = fopen(argv[op+1], OUTPUT_MODE); if (outfile == NULL) { - printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno)); + printf("** Failed to open \"%s\": %s\n", argv[op+1], strerror(errno)); yield = 1; goto EXIT; } @@ -9651,7 +10100,7 @@ free(dbuffer); free(pbuffer8); free(dfa_workspace); free(tables3); -PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables); +PCRE2_MAKETABLES_FREE(general_context, (const void *)locale_tables); PCRE2_MATCH_DATA_FREE(match_data); SUB1(pcre2_code_free, compiled_code); diff --git a/testdata/grepinput b/testdata/grepinput index 1e2ceb4..1a0a9c0 100644 --- a/testdata/grepinput +++ b/testdata/grepinput @@ -617,6 +617,25 @@ match 5: Rhubarb Custard Tart +zxc +cvb +bnm +asd +qwe +ert +tyu +uio +ggg +asd +dfg +ghj +jkl +abx +def +ghi +xyz + + PUT NEW DATA ABOVE THIS LINE. ============================= diff --git a/testdata/grepinputBad8 b/testdata/grepinputBad8 new file mode 100644 index 0000000..af52f08 --- /dev/null +++ b/testdata/grepinputBad8 @@ -0,0 +1 @@ +Aက€CD Z diff --git a/testdata/grepinputBad8_Trail b/testdata/grepinputBad8_Trail new file mode 100644 index 0000000..be1eed8 --- /dev/null +++ b/testdata/grepinputBad8_Trail @@ -0,0 +1 @@ +abcð \ No newline at end of file diff --git a/testdata/grepinputC.bz2 b/testdata/grepinputC.bz2 index 457047ad79db1a54081ce7d7acb9c6e21a801d27..dea7158beaef7bee047eec13b33df37d7471b6af 100644 GIT binary patch literal 315 zcmV-B0mS}7T4*^jL0KkKSs8a-v;Y7TsRm&dfq*`ROWDO^#o@Fh=@GXv$K0?Kvx-q z0K7g3(~3djgkW4nJzk?)p;N?3t8Hp1p==Ab3=bvR5jy z@AV;C!MsBV&K9)Z(qCZ8rL>=|pl;6ir*kjvxLZ}S2yNYt#g!GgdY^UtvTP&?P)%BK@n&rn7%IEaJII!85qAQC4) zatXr7o{Uma0&sCLS6CoposBnRD!bX%A>lmGbQglzso?4AjD6YtfL|gZ(?WQL`n2tg z^RjuIzHM9RwLuNM*zR3XUx%UF4ep*r#^^r^zvAvl LrwS4caGJGXhzXKq diff --git a/testdata/grepinputC.gz b/testdata/grepinputC.gz index c7ff390e08374563b11de5e5108ab79d9a54fd6d..b1c9fcd2b887fe62dddd52a64ec87e8cba3a743a 100644 GIT binary patch literal 313 zcmV-90ml9xiwFoRFGgnq17~t&aA|IEb#y}jeNr)Q!!Qio`3gQD$j~2j>)62`P+ZC; zDod_J$MNrbl!Hz|0mCAt;~ja@Fl%Fcft!pae{s|#Tq0{;H{)0;+bq0}RoQ+3JYx?Gvo^*TxXD=ZCr3@fC9>vqGmfRQ&BE(gWva5WNwan* zdY39**AIi4)XZTDnsN4VRAskY2S@10o-lhG+_68#W#EAHxe0bESyLo@( zEaTTTN8Xl)RHrIg$c{obR}qS3s}yw?YWhP>cF80<%|=-r=xOS^c6c)=(`tg;E3ZIf zUqNuK4)0MX^1!)z0FrE{y8y>}W4KF0zwPgG+}oG_56>6iMyJs10>BIN1Ah*8`UO_| JRoNy2006TFl4k$_ diff --git a/testdata/grepinputUN b/testdata/grepinputUN new file mode 100644 index 0000000..dd4a8cc --- /dev/null +++ b/testdata/grepinputUN @@ -0,0 +1,2 @@ +abcሴdef +xyz \ No newline at end of file diff --git a/testdata/grepinputv b/testdata/grepinputv index 366d4fb..029e2bc 100644 --- a/testdata/grepinputv +++ b/testdata/grepinputv @@ -7,3 +7,4 @@ The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal A buried feline in the syndicate +trailing spaces diff --git a/testdata/grepinputx b/testdata/grepinputx index 730cc8a..f4a5f10 100644 --- a/testdata/grepinputx +++ b/testdata/grepinputx @@ -1,4 +1,4 @@ -This is a second file of input for the pcregrep tests. +This is a second file of input for the pcre2grep tests. Here is the pattern again. diff --git a/testdata/grepnot.bz2 b/testdata/grepnot.bz2 index 730cc8a..f4a5f10 100644 --- a/testdata/grepnot.bz2 +++ b/testdata/grepnot.bz2 @@ -1,4 +1,4 @@ -This is a second file of input for the pcregrep tests. +This is a second file of input for the pcre2grep tests. Here is the pattern again. diff --git a/testdata/grepoutput b/testdata/grepoutput index d9233c2..f4e8996 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -10,7 +10,7 @@ RC=0 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. 10:This pattern is in lower case. -623:Check up on PATTERN near the end. +642:Check up on PATTERN near the end. RC=0 ---------------------------- Test 4 ------------------------------ 4 @@ -19,7 +19,7 @@ RC=0 ./testdata/grepinput:7:PATTERN at the start of a line. ./testdata/grepinput:8:In the middle of a line, PATTERN appears. ./testdata/grepinput:10:This pattern is in lower case. -./testdata/grepinput:623:Check up on PATTERN near the end. +./testdata/grepinput:642:Check up on PATTERN near the end. ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx:5:Pattern ./testdata/grepinputx:42:This line contains pattern not on a line by itself. @@ -28,7 +28,7 @@ RC=0 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. 10:This pattern is in lower case. -623:Check up on PATTERN near the end. +642:Check up on PATTERN near the end. 3:Here is the pattern again. 5:Pattern 42:This line contains pattern not on a line by itself. @@ -45,7 +45,7 @@ RC=0 ---------------------------- Test 10 ----------------------------- RC=1 ---------------------------- Test 11 ----------------------------- -1:This is a second file of input for the pcregrep tests. +1:This is a second file of input for the pcre2grep tests. 2: 4: 5:Pattern @@ -324,12 +324,12 @@ RC=0 ./testdata/grepinput-9- ./testdata/grepinput:10:This pattern is in lower case. -- -./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE. -./testdata/grepinput-621-============================= -./testdata/grepinput-622- -./testdata/grepinput:623:Check up on PATTERN near the end. +./testdata/grepinput-639-PUT NEW DATA ABOVE THIS LINE. +./testdata/grepinput-640-============================= +./testdata/grepinput-641- +./testdata/grepinput:642:Check up on PATTERN near the end. -- -./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. +./testdata/grepinputx-1-This is a second file of input for the pcre2grep tests. ./testdata/grepinputx-2- ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx-4- @@ -349,8 +349,8 @@ RC=0 ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24KiB long. ./testdata/grepinput-13- -- -./testdata/grepinput:623:Check up on PATTERN near the end. -./testdata/grepinput-624-This is the last line of this file. +./testdata/grepinput:642:Check up on PATTERN near the end. +./testdata/grepinput-643-This is the last line of this file. -- ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx-4- @@ -436,8 +436,11 @@ RC=0 RC=0 ---------------------------- Test 46 ------------------------------ pcre2grep: Error in 1st command-line regex at offset 8: unmatched closing parenthesis +RC=2 pcre2grep: Error in 2nd command-line regex at offset 9: missing closing parenthesis +RC=2 pcre2grep: Error in 3rd command-line regex at offset 9: missing terminating ] for character class +RC=2 pcre2grep: Error in 4th command-line regex at offset 9: missing terminating ] for character class RC=2 ---------------------------- Test 47 ------------------------------ @@ -464,6 +467,7 @@ The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal A buried feline in the syndicate +trailing spaces RC=0 ---------------------------- Test 52 ------------------------------ fox jumps @@ -492,7 +496,10 @@ RC=0 ./testdata/grepinput:456 ./testdata/grepinput3:0 ./testdata/grepinput8:0 +./testdata/grepinputBad8:0 +./testdata/grepinputBad8_Trail:0 ./testdata/grepinputM:0 +./testdata/grepinputUN:0 ./testdata/grepinputv:1 ./testdata/grepinputx:0 RC=0 @@ -554,7 +561,7 @@ RC=0 pear RC=0 ---------------------------- Test 69 ----------------------------- -1:This is a second file of input for the pcregrep tests. +1:This is a second file of input for the pcre2grep tests. 2: 4: 5:Pattern @@ -721,6 +728,8 @@ RC=0 ---------------------------- Test 96 ----------------------------- ./testdata/grepinput3 ./testdata/grepinput8 +./testdata/grepinputBad8 +./testdata/grepinputBad8_Trail ./testdata/grepinputx RC=0 ---------------------------- Test 97 ----------------------------- @@ -832,7 +841,10 @@ RC=0 testdata/grepinput:469 testdata/grepinput3:0 testdata/grepinput8:0 +testdata/grepinputBad8:0 +testdata/grepinputBad8_Trail:0 testdata/grepinputM:2 +testdata/grepinputUN:0 testdata/grepinputv:3 testdata/grepinputx:6 TOTAL:480 @@ -851,7 +863,10 @@ RC=0 469 0 0 +0 +0 2 +0 3 6 480 @@ -859,6 +874,9 @@ RC=0 ---------------------------- Test 118 ----------------------------- testdata/grepinput3 testdata/grepinput8 +testdata/grepinputBad8 +testdata/grepinputBad8_Trail +testdata/grepinputUN RC=0 ---------------------------- Test 119 ----------------------------- 123 @@ -875,6 +893,10 @@ RC=0 ./testdata/grepinput:a binary zero:zeroa ./testdata/grepinput:the binary zero.:zerothe. RC=0 +./testdata/grepinput:the binary zero.:zerothe. +./testdata/grepinput:a binary zero:zeroa +./testdata/grepinput:the binary zero.:zerothe. +RC=0 the binary zero.: RC=0 pcre2grep: Error in output text at offset 2: decimal number expected @@ -1169,6 +1191,7 @@ The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal A buried feline in the syndicate +trailing spaces RC=0 ---------------------------- Test 146 ----------------------------- (standard input):A123B @@ -1225,13 +1248,14 @@ Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] Type "pcre2grep --help" for more information and the long options. RC=2 ---------------------------- Test 150 ----------------------------- -pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE) +pcre2grep: Failed to set locale locale.bad (obtained from LC_CTYPE) RC=2 ---------------------------- Test 151 ----------------------------- The quick brown The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal +RC=0 ---------------------------- Test 152 ----------------------------- 24:four 25-five @@ -1253,3 +1277,56 @@ RC=0 36-sixteen 37-seventeen RC=0 +---------------------------- Test 154 ----------------------------- +RC=1 +---------------------------- Test 155 ----------------------------- +RC=1 +---------------------------- Test 156 ----------------------------- +The quick brown +fox jumps +over the lazy dog. +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate +trailing spaces +RC=0 +---------------------------- Test 157 ----------------------------- +RC=0 +---------------------------- Test 158 ----------------------------- +trailing spaces +RC=0 +---------------------------- Test 159 ----------------------------- +trailing spaces +RC=0 +---------------------------- Test 160 ----------------------------- +622-bnm +623-asd +624-qwe +625:ert +626-tyu +627-uio +628-ggg +629-asd +630-dfg +631-ghj +632:jkl +633-abx +634-def +635-ghi +RC=0 +621-cvb +622-bnm +623-asd +624-qwe +625:ert +626-tyu +627-uio +628-ggg +629-asd +630:dfg +631-ghj +632-jkl +RC=0 diff --git a/testdata/grepoutputC b/testdata/grepoutputC index 56bd283..725a6c5 100644 --- a/testdata/grepoutputC +++ b/testdata/grepoutputC @@ -1,3 +1,4 @@ +--- Test 1 --- Arg1: [T] [he ] [ ] Arg2: |T| () () (0) The quick brown Arg1: [T] [his] [s] Arg2: |T| () () (0) @@ -10,6 +11,8 @@ Arg1: [T] [he ] [ ] Arg2: |T| () () (0) The caterpillar sat on the mat Arg1: [T] [he ] [ ] Arg2: |T| () () (0) The snowcat is not an animal +RC=0 +--- Test 2 --- Arg1: [qu] [qu] The quick brown Arg1: [ t] [ t] @@ -22,6 +25,8 @@ Arg1: [ca] [ca] The caterpillar sat on the mat Arg1: [sn] [sn] The snowcat is not an animal +RC=0 +--- Test 3 --- 0:T The quick brown 0:T @@ -34,6 +39,8 @@ The word is cat in this line The caterpillar sat on the mat 0:T The snowcat is not an animal +RC=0 +--- Test 4 --- 0:T The quick brown @@ -52,11 +59,16 @@ The caterpillar sat on the mat 0:T The snowcat is not an animal +RC=0 +--- Test 5 --- T T T T T T +RC=1 +--- Test 6 --- 0:T:AA The quick brown +RC=0 diff --git a/testdata/grepoutputCN b/testdata/grepoutputCN index aef1a3d..ebab646 100644 --- a/testdata/grepoutputCN +++ b/testdata/grepoutputCN @@ -1,15 +1,20 @@ +--- Test 1 --- The quick brown This time it jumps and jumps and jumps. This line contains \E and (regex) *meta* [characters]. The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal +RC=0 +--- Test 2 --- The quick brown This time it jumps and jumps and jumps. This line contains \E and (regex) *meta* [characters]. The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal +RC=0 +--- Test 3 --- 0:T The quick brown 0:T @@ -22,17 +27,24 @@ The word is cat in this line The caterpillar sat on the mat 0:T The snowcat is not an animal +RC=0 +--- Test 4 --- The quick brown This time it jumps and jumps and jumps. This line contains \E and (regex) *meta* [characters]. The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal +RC=0 +--- Test 5 --- T T T T T T +RC=1 +--- Test 6 --- 0:T:AA The quick brown +RC=0 diff --git a/testdata/grepoutputCNU b/testdata/grepoutputCNU index 2fbfba0..550eeb9 100644 --- a/testdata/grepoutputCNU +++ b/testdata/grepoutputCNU @@ -1,3 +1,4 @@ +--- Test 1 --- 0:¦ The quick brown 0:¦ @@ -10,9 +11,12 @@ The word is cat in this line The caterpillar sat on the mat 0:¦ The snowcat is not an animal +RC=0 +--- Test 2 --- The quick brown This time it jumps and jumps and jumps. This line contains \E and (regex) *meta* [characters]. The word is cat in this line The caterpillar sat on the mat The snowcat is not an animal +RC=0 diff --git a/testdata/grepoutputCU b/testdata/grepoutputCU index ed8f491..e874923 100644 --- a/testdata/grepoutputCU +++ b/testdata/grepoutputCU @@ -1,3 +1,4 @@ +--- Test 1 --- 0:¦ The quick brown 0:¦ @@ -10,6 +11,8 @@ The word is cat in this line The caterpillar sat on the mat 0:¦ The snowcat is not an animal +RC=0 +--- Test 2 --- 0:¦ The quick brown @@ -28,3 +31,4 @@ The caterpillar sat on the mat 0:¦ The snowcat is not an animal +RC=0 diff --git a/testdata/grepoutputN b/testdata/grepoutputN index b39654f..4ee87e6 100644 --- a/testdata/grepoutputN +++ b/testdata/grepoutputN @@ -1,27 +1,42 @@ ---------------------------- Test N1 ------------------------------ -1:abc 2:def 1-abc 2:def ---------------------------- Test N2 ------------------------------ +1:abc 2:def RC=0 +1-abc 2:def RC=0 +---------------------------- Test N2 ------------------------------ 1:abc def 2:ghi jkl +RC=0 1-abc def 2:ghi jkl +RC=0 ---------------------------- Test N3 ------------------------------ 2:def 3: ghi -jkl ---------------------------- Test N4 ------------------------------ +jkl RC=0 +---------------------------- Test N4 ------------------------------ 2:ghi jkl +RC=0 ---------------------------- Test N5 ------------------------------ 1:abc 2:def 3:ghi 4:jkl +RC=0 1-abc 2:def +RC=0 ---------------------------- Test N6 ------------------------------ 1:abc 2:def 3:ghi 4:jkl +RC=0 3-ghi 4:jkl +RC=0 ---------------------------- Test N7 ------------------------------ -2:abc@3:def@1-xyz@2:abc@3:def@ +2:abc@3:def@RC=0 +1-xyz@2:abc@3:def@RC=0 +---------------------------- Test N8 ------------------------------ +1:abcð +RC=0 + diff --git a/testdata/grepoutputUN b/testdata/grepoutputUN index ae5eb7a..ff318cc 100644 --- a/testdata/grepoutputUN +++ b/testdata/grepoutputUN @@ -1,3 +1,4 @@ ----------------------------- Test UN1 ------------------------------ -1:abcሴdef +---------------------------- Test UN2 ------------------------------ +1:abcð +RC=0 diff --git a/testdata/testinput1 b/testdata/testinput1 index 00e76da..76a6308 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5087,6 +5087,15 @@ name)/mark \= Expect no match D +/(*COMMIT)ABC/no_start_optimize + ABC +\= Expect no match + DEFABC + +/(*COMMIT)ABC/ + ABC + DEFABC + # This should fail, as the skip causes a bump to offset 3 (the skip). /A(*MARK:A)A+(*SKIP)(B|Z) | AC/x,mark @@ -5610,9 +5619,15 @@ name)/mark /^ (?:(?A)|(?'B'B)(?A)) (?('A')x) (?()y)$/x,dupnames Ax BAxy - -/^A\xZ/ - A\0Z + +/^A\xBz/ + A\x{0B}z + +/^A\xABz/ + A\x{AB}z + +/^A\xABCz/ + A\x{AB}Cz /^A\o{123}B/ A\123B @@ -5786,12 +5801,6 @@ ef) x/x,mark /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ -/[s[:digit:]\E-H]+/ - s09-H - -/[s[:digit:]\Q\E-H]+/ - s09-H - /a+(?:|b)a/ aaaa @@ -6415,7 +6424,19 @@ ef) x/x,mark /\214748364/ >\x{8c}748364< - + +# smaller than GROUP_MAX +/\21300/ + \x8b00 + +# larger than GROUP_MAX +/\213000/ + \x8b000 + +# larger than INT_MAX +/\21300000000/ + \x8b00000000 + /a{65536/ >a{65536< @@ -6654,4 +6675,402 @@ $/x /(?\777< + abc + +/abc/substitute_extended,replace=>\o{012345}< + abc + +# Character range merging tests + +/[\x{100}-\x{200}\H\x{8000}-\x{9000}]/B + +/[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B + +/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B + +/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B + +/[\x00-\x2f\x11-\xff]*?!/B + abcd!e + +/i/turkish_casing + +# Character list tests + +/([\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}?).#/B + \x{9001}\x{9007}\x{8000}\x{ffff}\x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# + +/([\x{3000}\x{3001}\x{3003}\x{3004}\x{3006}\x{3007}\x{8000}-\x{ffff}\x{100001}\x{100002}\x{100004}\x{100005}\x{100007}\x{100008}\x{10000a}\x{10000b}\x{80000000}-\x{ffffffff}]{5,}).#/B + \x{2fff}\x{3002}\x{7fff}\x{100000}\x{7fffffff}\x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# + +/([^\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}]+).#/B + \x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# + \x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# + \x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# + \x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}\x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +# META_BIGVALUE tests + +/\x{80000000}/B + \x{80000000} +\= Expect no match + \x{7fffffff} + \x{80000001} + +/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B + \x{80000002} + \x{8fffffff} +\= Expect no match + \x{7fffffff} + \x{90000000} + +/\x{80000000}/B,alt_extended_class + \x{80000000} +\= Expect no match + \x{7fffffff} + \x{80000001} + +/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B,alt_extended_class + \x{80000002} + \x{8fffffff} +\= Expect no match + \x{7fffffff} + \x{90000000} + +/[\x{80000000}-\x{8000000f}--\x{80000002}]/B,alt_extended_class + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +/[[\x{80000000}-\x{8000000f}]--[\x{80000002}]]/B,alt_extended_class + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# META_BIGVALUE tests + +/(?[[\x{80000000}-\x{8000000f}]+\x{8fffffff}])/B + \x{80000002} + \x{8fffffff} +\= Expect no match + \x{7fffffff} + \x{90000000} + +/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +# -------------- + # End of testinput11 diff --git a/testdata/testinput12 b/testdata/testinput12 index 85550c3..9763f73 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -56,9 +56,6 @@ \x{c0} \x{f0} -/Ā{3,4}/IB,utf - \x{100}\x{100}\x{100}\x{100\x{100} - /(\x{100}+|x)/IB,utf /(\x{100}*a|x)/IB,utf @@ -468,6 +465,27 @@ /A\z/utf,match_invalid_utf A\x{df00}\n +/ab$/match_invalid_utf +\= Expect no match + ab\x{df00}cde + +/ab\z/match_invalid_utf +\= Expect no match + ab\x{df00}cde + +/ab\Z/match_invalid_utf +\= Expect no match + ab\x{df00}cde + +/(..)(*scs:(1)ab\z)/match_invalid_utf + ab\x{df00}cde + +/(..)(*scs:(1)ab\Z)/match_invalid_utf + ab\x{df00}cde + +/(..)(*scs:(1)ab$)/match_invalid_utf + ab\x{df00}cde + # ---------------------------------------------------- /(*UTF)(?=\x{123})/I @@ -489,9 +507,9 @@ # ---------------------------------------------------- # UCP and casing tests -/\x{120}/i,I +/\x{120}/iI -/\x{c1}/i,I,ucp +/\x{c1}/iI,ucp /[\x{120}\x{121}]/iB,ucp @@ -561,8 +579,90 @@ \= Expect no match \x{17f} +/(.) \1/i,ucp + i I + +/(.) \1/i,ucp,turkish_casing +\= Expect no match + i I + +/(.) \1/i,ucp + i I + \x{212a} k +\= Expect no match + i \x{0130} + \x{0131} I + +/(.) \1/i,ucp,turkish_casing + \x{212a} k + i \x{0130} + \x{0131} I +\= Expect no match + i I + +/(.) (?r:\1)/i,ucp,turkish_casing + i I +\= Expect no match + i \x{0130} + \x{0131} I + \x{212a} k + +/[a-z][^i]I/ucp,turkish_casing + bII + b\x{0130}I + b\x{0131}I +\= Expect no match + biI + +/[a-z][^i]I/i,ucp,turkish_casing + b\x{0131}I + bII +\= Expect no match + biI + b\x{0130}I + +/[a-z](?r:[^i])I/i,ucp,turkish_casing + b\x{0131}I + b\x{0130}I +\= Expect no match + bII + biI + +/b(?r:[\x{00FF}-\x{FFEE}])/i,ucp,turkish_casing + b\x{0130} + b\x{0131} + B\x{212a} +\= Expect no match + bi + bI + bk + +/[\x60-\x7f]/i,ucp,turkish_casing + i +\= Expect no match + I + +/[\x60-\xc0]/i,ucp,turkish_casing + i +\= Expect no match + I + +/[\x80-\xc0]/i,ucp,turkish_casing +\= Expect no match + i + I + # ---------------------------------------------------- +/b[\x{00FF}-\x{FFEE}]/ir + b\x{0130} + b\x{0131} + B\x{212a} +\= Expect no match + bi + bI + bk + # Quantifier after a literal that has the value of META_ACCEPT (not UTF). This # fails in 16-bit mode, but is OK for 32-bit. @@ -601,6 +701,15 @@ \= Expect no match Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +/[sk](?r:[sk])[sk]/Bi,ucp + SKS + sks + \x{212a}S\x{17f} + \x{17f}K\x{212a} +\= Expect no match + s\x{212a}s + K\x{17f}K + # --------------------------------------------------------- # End of testinput12 diff --git a/testdata/testinput2 b/testdata/testinput2 index b90489a..b162c53 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -831,6 +831,19 @@ /x++/IB +# For comparison with the following test, which disables auto-possessification +# In this regex, x+ should be converted to x++ +/x+y/B,auto_possess + +# In this regex, x+ should not be converted to x++ +/x+y/B,auto_possess_off + +# Also in this regex, x+ should not be converted to x++ +/x+y/B,optimization_none + +# In this one too, x+ should not be converted to x++ +/x+y/B,no_auto_possess + /x{1,3}+/B,no_auto_possess /x{1,3}+/Bi,no_auto_possess @@ -839,6 +852,8 @@ /[^x]{1,3}+/Bi,no_auto_possess +/x{1,3}+/IB,auto_possess_off + /(x)*+/IB /^(\w++|\s++)*$/I @@ -916,6 +931,12 @@ /\N{25,ab}/ +/[\N]/ + +/[\N{4}]/ + +/[\N{name}]/ + /a{1,3}b/ungreedy ab @@ -2401,6 +2422,9 @@ \= Expect no match cat +/cat[]/B,allow_empty_class + cat\=ph + /(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames cat @@ -2448,7 +2472,6 @@ ab /a[]*+b/allow_empty_class,match_unset_backref,dupnames -\= Expect no match ab /a[^]b/allow_empty_class,match_unset_backref,dupnames @@ -2862,6 +2885,13 @@ b"11111 a"11111 +/(?:a(?[0-5])|b(?[4-7]))c(?()d|e)/B,dupnames + a4cd + b4cd +\= Expect no match + a6cd + a6ce + /^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames abcdX eX @@ -3945,6 +3975,12 @@ no reduction Alfred Winifred +/[[:<:]]+red/B + little red riding hood + red is a colour +\= Expect no match + Alfred + /[a[:<:]] should give error/ /(?=ab\K)/aftertext,allow_lookaround_bsk @@ -3976,6 +4012,10 @@ /\xthing/ +/^A\xZ/ + +/^A\x/ + /\x{}/ /\x{whatever}/ @@ -4049,10 +4089,16 @@ /(?(VERSION=10.101)yes|no)/ +# We should see the starting code unit, required code unit, and minimum length set for this regex: /abcd/I +# None of the following three should have the starting code unit, required code unit, and minimum length set: /abcd/I,no_start_optimize +/abcd/I,start_optimize_off + +/abcd/I,optimization_none + /(|ab)*?d/I abd xyd @@ -4176,7 +4222,41 @@ /(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} apple lemon blackberry +/"(*:fruit" 00 "juice)apple"/hex,g,replace=${*MARK} + apple lemon blackberry + +/abc/ + 123abc123\=replace=XYZ + 123abc123\=replace=[10]XYZ +\= Expect error + 123abc123\=replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[6]XYZ + 123abc123\=substitute_overflow_length,replace=[1]XYZ + 123abc123\=substitute_overflow_length,replace=[0]XYZ + /abc/ + 123abc123\=replace=XY + 123abc123\=replace=[9]XY + 123abc123\=replace=[9]XY,substitute_literal +\= Expect error + 123abc123\=replace=[8]XY,substitute_overflow_length + 123abc123\=replace=[8]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[6]XY,substitute_overflow_length + 123abc123\=replace=[6]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[5]XY,substitute_overflow_length + 123abc123\=replace=[5]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[4]XY,substitute_overflow_length + 123abc123\=replace=[4]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[3]XY,substitute_overflow_length + 123abc123\=replace=[3]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[2]XY,substitute_overflow_length + 123abc123\=replace=[2]XY,substitute_overflow_length,substitute_literal + +/abc/substitute_literal + 123abc123\=replace=XYZ + 123abc123\=replace=[10]XYZ +\= Expect error 123abc123\=replace=[9]XYZ 123abc123\=substitute_overflow_length,replace=[9]XYZ 123abc123\=substitute_overflow_length,replace=[6]XYZ @@ -4190,6 +4270,75 @@ 123abc123\=substitute_overflow_length,replace=[1]x$1z 123abc123\=substitute_overflow_length,replace=[0]x$1z +/a(b)c/substitute_extended + ZabcZ\=replace=>\1< + ZabcZ\=replace=>\2< + ZabcZ\=replace=>\8< + ZabcZ\=replace=>${1}< + ZabcZ\=replace=>${ 1 }< + ZabcZ\=replace=>${2}< + ZabcZ\=replace=>${8}< + ZabcZ\=replace=>$<1>< + ZabcZ\=replace=>$< 1 >< + ZabcZ\=replace=>$<2>< + ZabcZ\=replace=>$<8>< + ZabcZ\=replace=>\g<-1>< + ZabcZ\=replace=>\g<0>< + ZabcZ\=replace=>\g<1>< + ZabcZ\=replace=>\g< 1 >< + ZabcZ\=replace=>\g<2>< + ZabcZ\=replace=>\g<8>< + +/(*:pear)apple/substitute_extended + ZappleZ\=replace=>${*MARK}< + ZappleZ\=replace=>$<*MARK>< + ZappleZ\=replace=>\g<*MARK>< + +/a(?b)c/substitute_extended + ZabcZ\=replace=>${named}< + ZabcZ\=replace=>${noexist}< + ZabcZ\=replace=>${}< + ZabcZ\=replace=>${ }< + ZabcZ\=replace=>${ named }< + ZabcZ\=replace=>$< + ZabcZ\=replace=>$< + ZabcZ\=replace=>$<>< + ZabcZ\=replace=>$< >< + ZabcZ\=replace=>$< named >< + ZabcZ\=replace=>\g< + ZabcZ\=replace=>\g< + ZabcZ\=replace=>\g<>< + ZabcZ\=replace=>\g< >< + ZabcZ\=replace=>\g< named >< + +/a(b)c/substitute_extended + ZabcZ\=replace=>${1:+ yes : no } + ZabcZ\=replace=>${1:+ \o{100} : \o{100} } + ZabcZ\=replace=>${1:+ \o{Z} : no } + ZabcZ\=replace=>${1:+ yes : \o{Z} } + ZabcZ\=replace=>${1:+ \g<1> : no } + ZabcZ\=replace=>${1:+ yes : \g<1> } + ZabcZ\=replace=>${1:+ \g<1 : no } + ZabcZ\=replace=>${1:+ yes : \g<1 } + ZabcZ\=replace=>${1:+ $<1> : no } + ZabcZ\=replace=>${1:+ yes : $<1> } + ZabcZ\=replace=>${1:+ $<1 : no } + ZabcZ\=replace=>${1:+ yes : $<1 } + +/a(b)c/substitute_extended + ZabcZ\=replace=>${ + ZabcZ\=replace=>${1 + ZabcZ\=replace=>${1Z + ZabcZ\=replace=>${1; + ZabcZ\=replace=>$< + ZabcZ\=replace=>$<1 + ZabcZ\=replace=>$<1Z + ZabcZ\=replace=>$<1; + ZabcZ\=replace=>\g< + ZabcZ\=replace=>\g<1 + ZabcZ\=replace=>\g<1Z + ZabcZ\=replace=>\g<1; + "((?=(?(?=(?(?=(?(?=()))))))))" a @@ -4217,6 +4366,19 @@ /^abc/info,no_dotstar_anchor +/^abc/info,dotstar_anchor_off + +# For comparison with the following tests, which disable automatic dotstar anchoring +/.*abc/BI + +/.*abc/BI,dotstar_anchor_off + +/.*abc/BI,start_optimize_off + +/.*abc/BI,optimization_none + +/.*abc/BI,no_dotstar_anchor + /.*\d/info,auto_callout \= Expect no match aaa @@ -4589,7 +4751,7 @@ B)x/alt_verbnames,mark /abcd/null_context abcd\=null_context -\= Expect error - not allowed together +\= Expect not allowed together abcd\=null_context,find_limits abcd\=allusedtext,startchar @@ -4605,6 +4767,9 @@ B)x/alt_verbnames,mark /a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended abcDE +/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended + Hello between wORLD + /abcd/replace=xy\kz,substitute_extended abcd @@ -4661,6 +4826,42 @@ B)x/alt_verbnames,mark /abcd/g >abcd1234abcd5678<\=replace=wxyz,substitute_matched +/abc/substitute_extended,replace=>\045< + abc + +/abc/substitute_extended,replace=>\45< + abc + +/abc/substitute_extended,replace=>\o{45}< + abc + +/abc/substitute_extended,replace=>\845< + abc + +/a(b)(c)/substitute_extended,replace=>\1< + abc + +/a(b)(c)/substitute_extended,replace=>\2< + abc + +/a(b)(c)/substitute_extended,replace=>\3< + abc + +/a(?b)c/substitute_extended + abc\=replace=>${namED_1}< + +/a(?b)c/substitute_extended + abc\=replace=>${namedverylongbutperfectlylegalsoyoushouldnthaveaproblem_1}< + +/abc/substitute_extended + abc\=replace=\a\b\e\f\n\r\t\v\\ + +/a(b)c/ + LabcR\=replace=>$&< + LabcR\=replace=>$`< + LabcR\=replace=>$'< + LabcR\=replace=>$_< + /^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I /((p(?'K/ @@ -5119,6 +5320,14 @@ a)"xI /(*LIMIT_HEAP=0)xxx/I +/(*LIMIT_HEAP=123/use_length + +/(*LIMIT_MATCH=/use_length + +/(*CRLF)(*LIMIT_DEPTH=/use_length + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length + /\d{0,3}(*:abc)(?C1)xxx/callout_info # ---------------------------------------------------------------------- @@ -5402,6 +5611,28 @@ a)"xI snowcat syndicate +# Confirm that the pcre2_set_optimize API does not have any undesired effect on literal patterns +/(cat)|dog/I,literal,auto_possess_off + (cat)|dog +\= Expect no match + the cat sat + +/(cat)|dog/I,literal,dotstar_anchor_off + (cat)|dog +\= Expect no match + the cat sat + +/(cat)|dog/I,literal,optimization_none + (cat)|dog +\= Expect no match + the cat sat + +# These should result in errors, since it is not permitted to use the +# PCRE2_NO_AUTO_POSSESS and PCRE2_NO_DOTSTAR_ANCHOR options on a literal pattern +/(cat)|dog/literal,no_auto_possess + +/(cat)|dog/literal,no_dotstar_anchor + /a whole line/match_line,multiline Rhubarb \na whole line\n custard \= Expect no match @@ -5574,6 +5805,54 @@ a)"xI 12abc34xyz\=substitute_skip=1 12abc34xyz\=substitute_stop=1 +/a(b)c/substitute_overflow_length,substitute_callout,replace=[1]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[5]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[1]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + abc + /abc\rdef/ abc\ndef @@ -5865,6 +6144,17 @@ a)"xI XABCY XabcY\=replace= +/abc/replace=\U$0,substitute_extended,substitute_case_callout + XabcY +\= Expect not supported + XabcY\=null_context + +/a/substitute_extended,substitute_case_callout + XaY\=replace=\U$0 + XaY\=replace=\L$0 + XaY\=replace=\u\L$0 + XaY\=replace=\l\U$0 + # Expect non-fixed-length error "(?<=X(?(DEFINE)(.*))(?1))." @@ -6050,9 +6340,8 @@ a)"xI \x0a abc\x0adef -# This test is currently broken in the interpreter -# /|a(?0)/endanchored -# aaaa +/|a(?0)/endanchored + aaaa /A +/extended @@ -6111,4 +6400,1372 @@ a)"xI /[^\S\W]{6}/ .abc def.. +/(*MARK:a/y_)/debug + +//i,sr + +# The behaviour of these tests is different from Perl because PCRE2 doesn't +# recognize \Q or \E within a quantifier, so these examples are not treated +# as quantifiers. Subsequent processing of the string removes the escapes. + +/a{\Q1\E,2}/ + xa{1,2}x +\= Expect no match + xaax + +/a{\E1,2}/ + xa{1,2}x +\= Expect no match + xaax + +# -------------- + +/(?<=|b?)./B + +/(?=|b?)./B + +/(?>|b?)./B + +/(?<=xy|a.b?|cd)/B + +# Tests for scan substring, a non Perl feature of PCRE2 + +# Parse errors first + +/(*scs:/ + +/(*scan_substring:(/ + +/(*scs:('name'/ + +/(*scs:(1)a|b)/ + +/(*scs:(0)a)/ + +/(*scan_substring:(1)a|b)/ + +/(*scs:()a|b)/ + +/(*scan_substring:()a|b)/ + +/()(*scs:(1)+a)/ + +/()(*scs:(1,1,1,1,1,1,1,1,2))/ + +/()()(*scs:(1,2,1,2,1,2,2,'XYZ'))/ + +# Tests for iterating scan_substring + +/(a)(*scs:(1)b)*c/B + +/(a)(*scs:(1)b)*?c/B + +/(a)(*scs:(1)b)*+c/B + +/(a)(*scs:(1)b)+c/B + +/(a)(*scs:(1)b)+?c/B + +/(a)(*scs:(1)b)++c/B + +/(a)(*scs:(1)b)?c/B + +/(a)(*scs:(1)b)??c/B + +/(a)(*scs:(1)b)?+c/B + +/(a)(*scs:(1)b){3}c/B + +/(a)(*scs:(1)b){3,5}?c/B + +/(a)(*scs:(1)b){3,}+c/B + +/(\w++)=(?(*scs:(1)(abc))pqr|xyz)(\w++)/ + +# Tests for scan_substring + +/([a-z]++)(*scs:(1)(stx)|(ne))(.)/B + ##string##next!## + __aastxaa:__ + __abababab:__ + +/(?[a-z]++)##(*scan_substring:('XX').*(..)$)\2/B + ##abcd##abcd##cd## + ##abcd##abcd##abcd## + +/([a-z])([a-z]++)(#+)(*scs:(2)(ab.))/ + xab## + yabc### + zababc#### + +/(?:(?[a-z]++)|(?[0-9]++)|$)(*scan_substring:('YYY')((?.).*\k$))/dupnames + $$abacd$$112345$$abca$$ + $$abcdeaf$$1234567819$$123456781$$ + +/([a-zA-Z]+)(*scs:(1).*?(?[A-Z]+)(*scan_substring:('ABC').*(.)\3))#+/ + ##abABCtuTUVXz##abCDEFGxyCDEEFGhi## + ##abAABCtuTUVXXz!!abCDEFGxyCDEFGGhi## + +/([a-zA-Z]+)(*scs:(1)(xy|ab(*ACCEPT)cd))/B + ##cdefgh##cdeabxy## + +/(?[a-zA-Z]+)(*scs:('AA')(ab(*ACCEPT)cd|xy))/B + ##cdefgh##cdeabxy## + +/([a-z]++)##(*scs:(1)(abc))?!/ + ##xyz##abc##! + ##xyz##! + ##xyz## + +/([a-z]++)##(*scs:(1)(abc))??(?(2)!|:)/ + ##abc##abc##! + ##abc##xyz##: + ##abc### + +/([a-z]++)##(*scs:(1)(abc)|xyz){8}(?(2)!|:)/ + ##abc##abc##! + ##abc##xyz##: + ##nnn##! + ##nnn##: + +/[A-Z]{3}([A-Z]++)#(*scs:(1)(?<=BC)XY)#/ + ABCXY##AKCXY## + +/()(\w++)=(*scs:(2)(?=abc))(\w++)/ + xabcx=pqr. + +/(\d++)(*scs:(1)\d+\z)(\w+)/ + X123XYZ + +/(\d++)(*scs:(1)\d+\Z)(\w+)/ + X123XYZ + +/(\d++)(*scs:(1)\d+$)(\w+)/ + X123XYZ + +/([a-z]{2})[a-z](*scs:(1)(.*?))\2$/ + abcab + abcabc + +/^(([a-z]([a-z]*+))(*scs:(2).(?=(?1)|$)\3)|#){5}/ + abcdefg#hijk#! + abcdefg#hijk#lmnopqr# + +/(*scs:(1)a)(a)|x/ + a + x + +/(*scs:()a)(?a)(?b)(?c)(?d)|x/dupnames + abcd + x + +/(*scs:(1)a)?(a)/ + b + a + +/(*scs:(1)a)??(a)/ + b + a + +# Custom backtrack, goes back n - 1 characters in the input (n=8) +/x(?|(*scs:(1)(?<=(.)))|()){8}/ + abcdefghx + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE)x)).+|(.+)/ + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr)x)).+|(.+)/mark + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr))).+|(.+)/mark + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT)x)).+|(.+)/ + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr)x)).+|(.+)/mark + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr))).+|(.+)/mark + abcdef + +/(abc)(def)(*scs:(1)(*scs:(2)de(*SKIP)x)).+|(.+)/ + abcdefghi + +/(abc)(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(.+)/ + abcdefghi + +/(?<=(abc))(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(ef.+)/ + abcdefghi + +/(abc)(def)(*scs:(2)(?:(*scs:(1)abc(*SKIP:notfound)x|abcd|(abc)))).+/ + abcdefghi + +/(abc)(def)(*MARK:markstr)(*scs:(2)(?:(*scs:(1)abc(*SKIP:markstr)x))).+|(.+)/ + abcdefghi + +/^([a-z]++)(?:((?6))|((?7))|((?8))|(#))(?(DEFINE)((*scs:(1)abc(*PRUNE)d))((*scs:(1)abc(*COMMIT)e))((*scs:(1)abc(*SKIP)f)))/ + abcd# + abce# + abcf# + abc# + +/\b(\w++)(*scs:(1)^)/ + sausages and mash +\= Expect no match + !sausages and mash + +/(\b\w{3,}+\b)(*scs:(1)\W*+(?:((.)\W*+(?2)\W*+\3|)|((.)\W*+(?4)\W*+\5|\W*+.\W*+))\W*+$)/ig + ipsum lorem revel level able was I ere I saw Elba + +/(?:(?'A'a)|(?b))(*scs:('A')b)c/dupnames + abc + +# Relative reference +/(xyz)(abc)(*scs:(-1)abc)(*scs:(-2)\1)/ + >xyzabc< + +/^([a-z]++)#(*scs:(1)a|ab|abc|abcd|abcde|abcdef|(abcdefg))\2/ + abcdefg#abcdefg + +/^([a-z]++)(*scs:(1)(a+)(*THEN)b|(a+)(*THEN)c|(aa))/ + aaaax + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b)|(a+)(*THEN)c|(aa))/ + aaaax + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b))?/ + aaaax + +/^([a-z]++)(*scs:(1)(abc|(a+)(*THEN)b))?/ + aaaax + +/^(?:(.){20,30}#|([a-z]++)(*scs:(1)(a+)(*THEN)b){20,30}#|(.){20,30}!)/ + aaaaaaaaaaaaaaaaaaaaaaaaab! + +# List of captures + +/(?:(abc)|(?def)|ghi)(*scs:(1,'PP').(.))/B + abc + def + ghi + +/(?:(?abc)|(?def)|(ghi)|(?'NN'jkl)|mno)(*scs:('MM',3,).(.))/B,dupnames + abc + def + ghi + jkl + mno + +/f(?:(*scs:(+1,+2)(?<=(.)))|()){16}/ + 1234567890abcdef + 1ffffffffffffff + +/(?a)(*scan_substring:(1,'AA',1,)a)b/B + ab + ac + +/()()()(?<=ab(*scs:(1,2,3))cd)xyz/ + abcdxyz + +/()()()(?<=ab(*ACCEPT)(*scs:(1,2,3))cd|efg)xyz/ + abxyz + efgxyz + +# Tests for pcre2_set_optimize() + +/abc/I,optimization_none + +/abc/I,optimization_none,auto_possess + +/abc/I,optimization_none,dotstar_anchor,auto_possess + +/abc/I,optimization_none,start_optimize + +/abc/I,dotstar_anchor_off,optimization_full + +# If pcre2_set_optimize() is used to turn on some optimization, but at the same time, +# the compile options word turns it off... the compile options word "wins": + +/abc/I,no_auto_possess,auto_possess + +/abc/I,no_dotstar_anchor,dotstar_anchor + +/abc/I,no_start_optimize,start_optimize + +# -------------- + +# larger than GROUP_MAX, smaller than INT_MAX +/a\800000b/ + +# coming up on INT_MAX... (used to succeed with \8 being literal 8) +/a\800000000b/ + +# over INT_MAX (used to succeed with \8 being literal 8) +/a\8000000000b/ + +# -------------- + +# no_bs0 + +/a\0b\x00c\00d/ + a\x{00}b\x{00}c\x{00}d + +/a\0b/no_bs0 + +/b\x00c\00d/no_bs0 + b\x{00}c\x{00}d + +/abc/substitute_extended + abc\=replace=a\0b\x00c\00d + +/abc/substitute_extended,no_bs0 + abc\=replace=a\0b + abc\=replace=b\x00c\00d + +# python_octal + +/\0-\00-\01-\012-\0123-\123-\1234/ + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + +/\1/ + +/\12/ + \o{12} + +/abc/substitute_extended + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + abc\=replace=\1 + abc\=replace=\12 + +/\0-\00-\01-\012-\0123-\123-\1234/python_octal + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + +/\1/python_octal + +/\12/python_octal + +/abc/substitute_extended,python_octal + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + abc\=replace=\1 + abc\=replace=\12 + +# -------------- + +/a(?C)b/ + abc + abc\=callout_none + +/a(?C)b/never_callout + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[a[]/ + [ + +/[a[]/alt_extended_class + +/[a[B]/alt_extended_class + +/[a[B]]C/B,alt_extended_class + aC + BC +\= Expect no match + [C + +/[[A][B]]/B,alt_extended_class + A + B +\= Expect no match + [ + ] + +/[[A]||[B]]/B,alt_extended_class + A + B +\= Expect no match + C + +/[[^A][B]]/B,alt_extended_class + B + C +\= Expect no match + A + +/[^[A][B]]/B,alt_extended_class + C +\= Expect no match + A + B + +/[^[A]&&[B]]/B,alt_extended_class + A + B + C + +/[[AC]||[BC]]/B,alt_extended_class + A + B + C +\= Expect no match + D + +/[[AC]&&[BC]]/B,alt_extended_class + C +\= Expect no match + A + B + D + +/[[AC]--[BC]]/B,alt_extended_class + A +\= Expect no match + B + C + D + +/[[AC]~~[BC]]/B,alt_extended_class + A + B +\= Expect no match + C + D + +/[A[]]]/B,alt_extended_class + A + ] +\= Expect no match + [ + +/[A[^]]]/B,alt_extended_class + A + [ + C +\= Expect no match + ] + +/[A[]]/B,alt_extended_class,allow_empty_class + A +\= Expect no match + ] + [ + +/[A[^]]/B,alt_extended_class,allow_empty_class + A + C + [ + ] + +/[A-C--B]/B,alt_extended_class + A + C +\= Expect no match + B + +/[^A-C--B]/B,alt_extended_class + B +\= Expect no match + A + C + +/[[\d\D]--b]/B,alt_extended_class + a + c +\= Expect no match + b + +/[\dAC-E[:space:]&&[^z]]/B,alt_extended_class + 0 + A + C + D + E + \t +\= Expect no match + B + F + ; + +/[z||[^\dAC-E[:space:]]]/B,alt_extended_class + z + B + F + ; +\= Expect no match + 0 + A + C + D + E + \t + +/[ab||cd]/B,alt_extended_class + a + c +\= Expect no match + e + +/[[a]b||[c]d]/B,alt_extended_class + a + c +\= Expect no match + e + +/[a[b]||c[d]]/B,alt_extended_class + a + c +\= Expect no match + e + +/[-&&-]/B,alt_extended_class + - +\= Expect no match + a + +/[a-&&-a]/B,alt_extended_class + - + a +\= Expect no match + b + +/[-a&&a-]/B,alt_extended_class + - + a +\= Expect no match + b + +/[[a]-&&-[a]]/B,alt_extended_class + - + a +\= Expect no match + b + +/[-[a]&&[a]-]/B,alt_extended_class + - + a +\= Expect no match + b + +/(?xx:[ ^ a[ ^ b] ])/B,alt_extended_class + b +\= Expect no match + A + a + c + +/[ ^ a[ ^ b] ]/B,alt_extended_class + \x20 + ^ + a + b +\= Expect no match + c + +/[a-c--b]+/B,alt_extended_class + ac + a +\= Expect no match + b + +/[a-c--b]{2,3}/B,alt_extended_class + ac + cac +\= Expect no match + a + bb + +/x[a-c--b]+y/B,alt_extended_class + xacy + xaay + xay +\= Expect no match + zacy + xacz + xy + xby + +/[A--B--C--D]/B,alt_extended_class + A +\= Expect no match + B + +/[A--A--A]/B,alt_extended_class +\= Expect no match + A + B + +/[[A--A]--A]/B,alt_extended_class +\= Expect no match + A + B + +/[A--[A--A]]/B,alt_extended_class + A +\= Expect no match + B + +/[A--^B]/B,alt_extended_class + A +\= Expect no match + B + ^ + z + +/([a-z--n])\1/B,alt_extended_class + aa + zz +\= Expect no match + az + nn + +/(x[a-z--n]y)\1/B,alt_extended_class + xayxay + xzyxzy +\= Expect no match + xnyxny + +/(?:_\1|([a-z--n])){2}/B,alt_extended_class + a_a + z_z +\= Expect no match + a_z + n_n + +/(?:_\1|([a-z--n]))+/B,alt_extended_class + a_a + z_z + a_partial +\= Expect no match + n_n + +/[\d-[z]]/B,alt_extended_class + 1 + - + z + +/[\d-||z]/B,alt_extended_class + 1 + - + z + +/[z[\d-]]/B,alt_extended_class + 1 + - + z + +/[1-[z]]/B,alt_extended_class + 1 + - + z + +/[1-||z]/B,alt_extended_class + 1 + - + z + +/[z[1-]]/B,alt_extended_class + 1 + - + z + +/[a--/alt_extended_class + +/[a--a/alt_extended_class + +/[a--[a/alt_extended_class + +/[a--[a]/alt_extended_class + +/[a--[a]--/alt_extended_class + +/[a--]/alt_extended_class + +/[--a]/alt_extended_class + +/[^--a]/alt_extended_class + +/[--]/alt_extended_class + +/[a---b]/alt_extended_class + +/[a----b]/alt_extended_class + +/[a&&&b]/alt_extended_class + +/[a|||b]/alt_extended_class + +/[a~~~b]/alt_extended_class + +/[a~~~~b]/alt_extended_class + +/[a~~/alt_extended_class + +/[a~~~/alt_extended_class + +/[a~~~~/alt_extended_class + +/[a||b&&c]/alt_extended_class + +/[a||b~~c]/alt_extended_class + +/[a~~b&&c]/alt_extended_class + +/[a--b~~c]/alt_extended_class + +/[a--b&&c]/alt_extended_class + +/[a||b--c]/alt_extended_class + +/[a||[b--c]]/alt_extended_class + a + b +\= Expect no match + c + +/[\d-z]/B,alt_extended_class + +/[z-\d]/B,alt_extended_class + +/[abc -- b]+/B,alt_extended_class + acacbac + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a]]]]]]]]]]]]]]]/alt_extended_class + a +\= Expect no match + b + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[z]&&a]]]]]]]]]]]]]]]/alt_extended_class + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a[z]]]]]]]]]]]]]]]]/alt_extended_class + +/[z&/alt_extended_class + +/[[^]~~[^]]/B,alt_extended_class,allow_empty_class +\= Expect no match + a + +/[^[[^]~~[^]]]/B,alt_extended_class,allow_empty_class + a + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# allow-empty-class does nothing inside (?[...]) +/(?[ []] ])/B,allow_empty_class + ] + +# bad-escape-is-literal does nothing inside (?[...]) +/[ \j ]/ + +/[ /\ + +/(?[ \j ])/ + +/(?[ /\ + +/[ \j ]/bad_escape_is_literal + j +\= Expect no match + k + +/[ /\bad_escape_is_literal + +/(?[ \j ])/bad_escape_is_literal + +/(?[ /\bad_escape_is_literal + +/(?[ [\j] ])/bad_escape_is_literal + +/(?[ (\j) ])/bad_escape_is_literal + +# We can't test error cases in testinput1 + +/(?[])/ + +/(?[/ + +/(?[]/ + +/(?[\n/ + +/(?[\n]/ + +/(?[\n]z)/ + +/(?[\n] )/ + +/(?[(/ + +/(?[( / + +/(?[(\n/ + +/(?[ \n + () ])/ + +/(?[1])/ + +/(?[a])/ + +/(?[a-c])/ + +/(?[(])/ + +/(?[(\n])/ + +/(?[\n)])/ + +/(?[^\n])/ + +/(?[ \n \t ])/ + +/(?[ \d \t ])/ + +/(?[ [\n] \t ])/ + +/(?[ (\n) \t ])/ + +/(?[ [:alpha:] \t ])/ + +/(?[ \n + \t \d ])/ + +/(?[ !\n \t ])/ + +/(?[ \n [:alpha:] ])/ + +/(?[ \n [\d] ])/ + +/(?[ \n (\t) ])/ + +/(?[ \n !\t ])/ + +/(?[ \n \t ])/ + +/(?[:graph:])/ + +/(?[\Qn\E])/ + +# maximum depth tests + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&\n))))))))))))))])/ + \n +\= Expect no match + a + b + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+([\n]&\n))))))))))))))])/ + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&[\n]))))))))))))))])/ + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+((\n)&\n))))))))))))))])/ + +# -------------- + +/[[:digit:] -Z]/xx + +/[\d -Z]/xx + +/[[:digit:]\E-H]/ + +/[[:digit:]\Q\E-H]+/ + +/[z-[:space:]]/ + +/[z-\d]/ + +/[[:space:]-z]/ + +/[\d-z]/ + +/[\d-\w]/ + +/[\Q/ + +/[\Q/\ + +/[\Q\E/ + +/[\Q\n/ + +/[\Q\n]/ + +/[\Q\n/\ + +/[\Q\n\]/ + +/[\Q\n\E/ + +/[\Q\n\E]/ + \\ + n +\= Expect no match + \n + Q + +/[z\Q/ + +/[z\Q/\ + +/[z\Q\E/ + +/[/\ + +/[\n/ + +/[\E/ + +/[\^z]/B + +/[ \^]/B + +/[\\z]/B + +/[0-z]/B + +/[0\-z]/B + +/[]z]/B + +/[ \]]/B + +/[ --]/B + +/[A-\]]/B + +/[A-\\]/B + +/[\A]/ + +/[\Z]/ + +/[\z]/ + +/[\G]/ + +/[\K]/ + +/[\g<1>]/ + < + g +\= Expect no match + \\ + +/[\k<1>]/ + < + k +\= Expect no match + \\ + +/[\u{ 1z}]/alt_bsux,extra_alt_bsux + u + { + } + \x20 + 1 +\= Expect no match + \\ + +/[a\x{e1}]/iB + a + A + \x{e1} + +# -------------- + +# Attempt at full coverage of the substitution buffer-management code - not +# just covering each line in each macro, but covering each instantiation of each +# line in those macros. + +# +# CHECKMEMCPY tests +# +# Four conditions for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Additionally some CHECKMEMCPYs have a substitute_replacement_only branch. +# + +# pre-start-offset fragment +# no "overflow after previous overflow" condition +/a/ + XYaZ\=offset=2,replace=foo + XYaZ\=offset=2,replace=[1]foo + XYaZ\=offset=2,substitute_overflow_length,replace=[1]foo + XYaZ\=offset=2,substitute_replacement_only,replace=foo + +# pre-match fragment +/a/ + XYaZ\=replace=foo + XYaZ\=replace=[1]foo + XYaZ\=substitute_overflow_length,replace=[1]foo + XXYaZ\=offset=2,substitute_overflow_length,replace=[1]foo + XYaZ\=substitute_replacement_only,replace=foo + +# empty match bumpalong +/(?<=abc)(|DEF)/g + abcDEFabcZ\=replace=+ + abcDEFabcZ\=replace=[5]+ + abcDEFabcZ\=substitute_overflow_length,replace=[5]+ + abcDEFabcZ\=replace=[9]+ + abcDEFabcZ\=substitute_overflow_length,replace=[9]+ + abcDEFabcZ\=substitute_overflow_length,replace=[1]+ + abcDEFabcZ\=substitute_replacement_only,replace=+ + +# literal replacement +/a/ + XYaZ\=substitute_literal,replace=$0 + XYaZ\=substitute_literal,replace=[3]$0 + XYaZ\=substitute_literal,substitute_overflow_length,replace=[3]$0 + XYaZ\=substitute_literal,substitute_overflow_length,replace=[1]$0 + +# a MARK +/(*:pear)apple/ + XappleY\=replace=${*MARK} + XappleY\=replace=[3]${*MARK} + XappleY\=substitute_overflow_length,replace=[3]${*MARK} + XXappleY\=substitute_overflow_length,replace=[1]${*MARK} + +# a subject fragment +/a(bb)c/ + XabbcY\=replace=$1 + XabbcY\=replace=[2]$1 + XabbcY\=substitute_overflow_length,replace=[2]$1 + XXabbcY\=substitute_overflow_length,replace=[1]$1 + +# a zero-length subject fragment +/a()c/ + XacY\=replace=$1 + XacY\=replace=[2]$1 + XacY\=substitute_overflow_length,replace=[2]$1 + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\x{48} + XabcY\=replace=[1]\x{48} + XabcY\=substitute_overflow_length,replace=[1]\x{48} + XXabcY\=substitute_overflow_length,replace=[1]\x{48} + +# a replacement literal character +/abc/ + XabcY\=replace=Z + XabcY\=replace=[1]Z + XabcY\=substitute_overflow_length,replace=[1]Z + XXabcY\=substitute_overflow_length,replace=[1]Z + +# a cancelled substitution +# no "overflow after previous overflow" condition +/abc/substitute_skip=1 + XabcY\=replace=Z + XabcY\=replace=[3]Z + XabcY\=substitute_overflow_length,replace=[3]Z + XabcY\=substitute_replacement_only,replace=Z + +# the rest of the subject +/abc/ + XabcYY\=replace=Z + XabcYY\=replace=[3]Z + XabcYY\=substitute_overflow_length,replace=[3]Z + XabcYY\=substitute_overflow_length,replace=[1]Z + XabcYY\=substitute_replacement_only,replace=Z + +# the trailing NULL +/abc/ + XabcY\=replace=Z + XabcY\=replace=[3]Z + XabcY\=substitute_overflow_length,replace=[3]Z + XabcY\=substitute_overflow_length,replace=[1]Z + +# +# CHECKCASECPY tests +# +# The same four conditions for CHECKCASECPY as for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Also the condition where CHECKCASECPY isn't called due to a custom callout +# + +# a MARK +/(*:pear)apple/substitute_extended + XappleY\=replace=\U${*MARK} + XappleY\=replace=[3]\U${*MARK} + XappleY\=substitute_overflow_length,replace=[3]\U${*MARK} + XXappleY\=substitute_overflow_length,replace=[1]\U${*MARK} + XappleY\=substitute_case_callout,replace=\U${*MARK} + +# a subject fragment +/a(bb)c/substitute_extended + XabbcY\=replace=\U$1 + XabbcY\=replace=[2]\U$1 + XabbcY\=substitute_overflow_length,replace=[2]\U$1 + XXabbcY\=substitute_overflow_length,replace=[1]\U$1 + XabbcY\=substitute_case_callout,replace=\U$1 + +# a zero-length subject fragment +/a()c/substitute_extended + XacY\=replace=\U$1 + XacY\=replace=[2]\U$1 + XacY\=substitute_overflow_length,replace=[2]\U$1 + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\U\x{48} + XabcY\=replace=[1]\U\x{48} + XabcY\=substitute_overflow_length,replace=[1]\U\x{48} + XXabcY\=substitute_overflow_length,replace=[1]\U\x{48} + XabcY\=substitute_case_callout,replace=\U\x{48} + +# a replacement literal character +/abc/substitute_extended + XabcY\=replace=\UZ + XabcY\=replace=[1]\UZ + XabcY\=substitute_overflow_length,replace=[1]\UZ + XXabcY\=substitute_overflow_length,replace=[1]\UZ + XabcY\=substitute_case_callout,replace=\UZ + +# +# DELAYEDFORCECASE tests +# +# Some different triggering conditions for DELAYEDFORCECASE: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# if there was a previous overflow, then the case callout can't be invoked +# Also, the CASEERROR branch. +# Also, the branch for where chars_outstanding is zero, both with and without +# a previous overflow. +# + +# on set casing mode +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=\Uf\Lq + XabcY\=replace=[2]\Uf\Lq + XabcY\=substitute_overflow_length,replace=[2]\Uf\Lq + XabcY\=substitute_overflow_length,replace=[1]\Uf\Lq + XabcY\=replace=\U!\Lq + XabcY\=replace=\U\Lq + XXabcY\=substitute_overflow_length,replace=[1]\U\Lq + +# trailing fragment +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=f + XabcY\=replace=\Uf + XabcY\=replace=[2]\Uf + XabcY\=substitute_overflow_length,replace=[2]\Uf + XabcY\=substitute_overflow_length,replace=[1]\Uf + XabcY\=replace=\U! + XabcY\=replace=\U + XXabcY\=substitute_overflow_length,replace=[1]\U + +# +# do_case_copy tests +# + +/aa/i,substitute_extended + XaaY\=replace=\Uaa\uaa\LAA\lAA\U\lAA\L\uaa\u\LaaA\l\UAAa + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lAA + XaaY\=replace=[2]\lAA + XaaY\=replace=[3]\lAA + XaaY\=replace=[4]\lAA + XaaY\=replace=[5]\lAA + XAAY\=replace=[1]\l$0 + XAAY\=replace=[2]\l$0 + XAAY\=replace=[3]\l$0 + XAAY\=replace=[4]\l$0 + XAAY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UAa + XaaY\=replace=[2]\l\UAa + XaaY\=replace=[3]\l\UAa + XaaY\=replace=[4]\l\UAa + XaaY\=replace=[5]\l\UAa + XAaY\=replace=[1]\l\U$0 + XAaY\=replace=[2]\l\U$0 + XAaY\=replace=[3]\l\U$0 + XAaY\=replace=[4]\l\U$0 + XAaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaA + XaaY\=replace=[2]\u\LaA + XaaY\=replace=[3]\u\LaA + XaaY\=replace=[4]\u\LaA + XaaY\=replace=[5]\u\LaA + XaAY\=replace=[1]\u\L$0 + XaAY\=replace=[2]\u\L$0 + XaAY\=replace=[3]\u\L$0 + XaAY\=replace=[4]\u\L$0 + XaAY\=replace=[5]\u\L$0 + +/aa/i,substitute_extended,substitute_overflow_length + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lAA + XaaY\=replace=[2]\lAA + XaaY\=replace=[3]\lAA + XaaY\=replace=[4]\lAA + XaaY\=replace=[5]\lAA + XAAY\=replace=[1]\l$0 + XAAY\=replace=[2]\l$0 + XAAY\=replace=[3]\l$0 + XAAY\=replace=[4]\l$0 + XAAY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UAa + XaaY\=replace=[2]\l\UAa + XaaY\=replace=[3]\l\UAa + XaaY\=replace=[4]\l\UAa + XaaY\=replace=[5]\l\UAa + XAaY\=replace=[1]\l\U$0 + XAaY\=replace=[2]\l\U$0 + XAaY\=replace=[3]\l\U$0 + XAaY\=replace=[4]\l\U$0 + XAaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaA + XaaY\=replace=[2]\u\LaA + XaaY\=replace=[3]\u\LaA + XaaY\=replace=[4]\u\LaA + XaaY\=replace=[5]\u\LaA + XaAY\=replace=[1]\u\L$0 + XaAY\=replace=[2]\u\L$0 + XaAY\=replace=[3]\u\L$0 + XaAY\=replace=[4]\u\L$0 + XaAY\=replace=[5]\u\L$0 + +/aa/i,substitute_extended,substitute_case_callout + XaaY\=replace=\Uaa\uaa\LBB\lBB\U\lBB\L\uaa\u\LaaB\l\UBBa + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lBB + XaaY\=replace=[2]\lBB + XaaY\=replace=[3]\lBB + XaaY\=replace=[4]\lBB + XaaY\=replace=[5]\lBB + XBBY\=replace=[1]\l$0 + XBBY\=replace=[2]\l$0 + XBBY\=replace=[3]\l$0 + XBBY\=replace=[4]\l$0 + XBBY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UBa + XaaY\=replace=[2]\l\UBa + XaaY\=replace=[3]\l\UBa + XaaY\=replace=[4]\l\UBa + XaaY\=replace=[5]\l\UBa + XBaY\=replace=[1]\l\U$0 + XBaY\=replace=[2]\l\U$0 + XBaY\=replace=[3]\l\U$0 + XBaY\=replace=[4]\l\U$0 + XBaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaB + XaaY\=replace=[2]\u\LaB + XaaY\=replace=[3]\u\LaB + XaaY\=replace=[4]\u\LaB + XaaY\=replace=[5]\u\LaB + XaBY\=replace=[1]\u\L$0 + XaBY\=replace=[2]\u\L$0 + XaBY\=replace=[3]\u\L$0 + XaBY\=replace=[4]\u\L$0 + XaBY\=replace=[5]\u\L$0 + +/aa/i,substitute_extended,substitute_case_callout,substitute_overflow_length + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lBB + XaaY\=replace=[2]\lBB + XaaY\=replace=[3]\lBB + XaaY\=replace=[4]\lBB + XaaY\=replace=[5]\lBB + XBBY\=replace=[1]\l$0 + XBBY\=replace=[2]\l$0 + XBBY\=replace=[3]\l$0 + XBBY\=replace=[4]\l$0 + XBBY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UBa + XaaY\=replace=[2]\l\UBa + XaaY\=replace=[3]\l\UBa + XaaY\=replace=[4]\l\UBa + XaaY\=replace=[5]\l\UBa + XBaY\=replace=[1]\l\U$0 + XBaY\=replace=[2]\l\U$0 + XBaY\=replace=[3]\l\U$0 + XBaY\=replace=[4]\l\U$0 + XBaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaB + XaaY\=replace=[2]\u\LaB + XaaY\=replace=[3]\u\LaB + XaaY\=replace=[4]\u\LaB + XaaY\=replace=[5]\u\LaB + XaBY\=replace=[1]\u\L$0 + XaBY\=replace=[2]\u\L$0 + XaBY\=replace=[3]\u\L$0 + XaBY\=replace=[4]\u\L$0 + XaBY\=replace=[5]\u\L$0 + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l\U!a + XaaY\=replace=\l\Ua! + XaaY\=replace=\ufa + XaaY\=replace=[3]\ufa + XaaY\=replace=\l\Uaoo + XaaY\=replace=[4]\l\Uaoo + XaaY\=replace=\l\UPa + XaaY\=replace=[3]\l\UPa + XaaY\=replace=[4]\l\UPa + XaaY\=replace=\l\UPo + XaaY\=replace=[3]\l\UPo + XaaY\=replace=[4]\l\UPo + XaaY\=replace=\l\UPpp + XaaY\=replace=[4]\l\UPpp + XaaY\=replace=[5]\l\UPpp + +# +# special test-callback case transformation tests +# + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l! + XaaY\=replace=\ua\lB + XaaY\=replace=\LdDZ\UdDZ\ud\uD\uZ + XaaY\=replace=\uf\Uf\Lf\Us\Ls\uS\lS + XaaY\=replace=\LOO\LOQ\UOO\uo\lo + XaaY\=replace=\upq\upp\lpp\Upp\Lpp\lP\uP + XaaY\=replace=\ll\ul\Ul\LMmNn\UMmNn + XaaY\=replace=\Uac\Uaca\Uak\Uaka\Lck\LBK\LBKB\LBK \UK + Xaay\=replace=\u\Lqj\u\Lij\u\LIj\u\LiJ\u\LIJ\u\Liq\u\Lij\Uij\UiIjJ\LiIjJ + Xaay\=replace=\Uaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +# -------------- + # End of testinput2 diff --git a/testdata/testinput21 b/testdata/testinput21 index 1d1fbed..5904af3 100644 --- a/testdata/testinput21 +++ b/testdata/testinput21 @@ -13,4 +13,6 @@ /(?<=ab\Cde)X/ abZdeX +/[\C]/ + # End of testinput21 diff --git a/testdata/testinput23 b/testdata/testinput23 index d0a9bc4..8a1f4cc 100644 --- a/testdata/testinput23 +++ b/testdata/testinput23 @@ -4,4 +4,6 @@ /a\Cb/ +/a[\C]b/ + # End of testinput23 diff --git a/testdata/testinput26 b/testdata/testinput26 index 94b3abc..0d262ee 100644 --- a/testdata/testinput26 +++ b/testdata/testinput26 @@ -1,2750 +1,2754 @@ -# These tests are generated by maint/GenerateTest26.py, do not edit. +# These tests were generated by maint/GenerateTest.py using PCRE2's UCP +# data, do not edit unless that data has changed and they are reflecting +# a previous version. -# Unicode Script Extension tests. +# Unicode Script Extension tests for version 15.0.0 + +#perltest # Base script check /^\p{sc=Latin}/utf - A + A /^\p{Script=Latn}/utf - \x{1df2a} + \x{1df2a} # Script extension check /^\p{Latin}/utf - \x{363} + \x{363} /^\p{scx=Latn}/utf - \x{a92e} + \x{a92e} # Script extension only character /^\p{Latin}/utf - \x{363} + \x{363} /^\p{sc=Latin}/utf - \x{363} + \x{363} # Character not in script /^\p{Latin}/utf - \x{1df2b} + \x{1df2b} # Base script check /^\p{sc=Greek}/utf - \x{370} + \x{370} /^\p{Script=Grek}/utf - \x{1d245} + \x{1d245} # Script extension check /^\p{Greek}/utf - \x{342} + \x{342} /^\p{Script_Extensions=Grek}/utf - \x{1dc1} + \x{1dc1} # Script extension only character /^\p{Greek}/utf - \x{342} + \x{342} /^\p{sc=Greek}/utf - \x{342} + \x{342} # Character not in script /^\p{Greek}/utf - \x{1d246} + \x{1d246} # Base script check /^\p{sc=Cyrillic}/utf - \x{400} + \x{400} /^\p{Script=Cyrl}/utf - \x{1e08f} + \x{1e08f} # Script extension check /^\p{Cyrillic}/utf - \x{483} + \x{483} /^\p{scx=Cyrl}/utf - \x{a66f} + \x{a66f} # Script extension only character /^\p{Cyrillic}/utf - \x{2e43} + \x{2e43} /^\p{sc=Cyrillic}/utf - \x{2e43} + \x{2e43} # Character not in script /^\p{Cyrillic}/utf - \x{1e090} + \x{1e090} # Base script check /^\p{sc=Arabic}/utf - \x{600} + \x{600} /^\p{Script=Arab}/utf - \x{1eef1} + \x{1eef1} # Script extension check /^\p{Arabic}/utf - \x{60c} + \x{60c} /^\p{Script_Extensions=Arab}/utf - \x{102fb} + \x{102fb} # Script extension only character /^\p{Arabic}/utf - \x{102e0} + \x{102e0} /^\p{sc=Arabic}/utf - \x{102e0} + \x{102e0} # Character not in script /^\p{Arabic}/utf - \x{1eef2} + \x{1eef2} # Base script check /^\p{sc=Syriac}/utf - \x{700} + \x{700} /^\p{Script=Syrc}/utf - \x{86a} + \x{86a} # Script extension check /^\p{Syriac}/utf - \x{60c} + \x{60c} /^\p{scx=Syrc}/utf - \x{1dfa} + \x{1dfa} # Script extension only character /^\p{Syriac}/utf - \x{1dfa} + \x{1dfa} /^\p{sc=Syriac}/utf - \x{1dfa} + \x{1dfa} # Character not in script /^\p{Syriac}/utf - \x{1dfb} + \x{1dfb} # Base script check /^\p{sc=Thaana}/utf - \x{780} + \x{780} /^\p{Script=Thaa}/utf - \x{7b1} + \x{7b1} # Script extension check /^\p{Thaana}/utf - \x{60c} + \x{60c} /^\p{Script_Extensions=Thaa}/utf - \x{fdfd} + \x{fdfd} # Script extension only character /^\p{Thaana}/utf - \x{fdf2} + \x{fdf2} /^\p{sc=Thaana}/utf - \x{fdf2} + \x{fdf2} # Character not in script /^\p{Thaana}/utf - \x{fdfe} + \x{fdfe} # Base script check /^\p{sc=Devanagari}/utf - \x{900} + \x{900} /^\p{Script=Deva}/utf - \x{11b09} + \x{11b09} # Script extension check /^\p{Devanagari}/utf - \x{951} + \x{951} /^\p{scx=Deva}/utf - \x{a8f3} + \x{a8f3} # Script extension only character /^\p{Devanagari}/utf - \x{1cd1} + \x{1cd1} /^\p{sc=Devanagari}/utf - \x{1cd1} + \x{1cd1} # Character not in script /^\p{Devanagari}/utf - \x{11b0a} + \x{11b0a} # Base script check /^\p{sc=Bengali}/utf - \x{980} + \x{980} /^\p{Script=Beng}/utf - \x{9fe} + \x{9fe} # Script extension check /^\p{Bengali}/utf - \x{951} + \x{951} /^\p{Script_Extensions=Beng}/utf - \x{a8f1} + \x{a8f1} # Script extension only character /^\p{Bengali}/utf - \x{1cf7} + \x{1cf7} /^\p{sc=Bengali}/utf - \x{1cf7} + \x{1cf7} # Character not in script /^\p{Bengali}/utf - \x{a8f2} + \x{a8f2} # Base script check /^\p{sc=Gurmukhi}/utf - \x{a01} + \x{a01} /^\p{Script=Guru}/utf - \x{a76} + \x{a76} # Script extension check /^\p{Gurmukhi}/utf - \x{951} + \x{951} /^\p{scx=Guru}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Gurmukhi}/utf - \x{a836} + \x{a836} /^\p{sc=Gurmukhi}/utf - \x{a836} + \x{a836} # Character not in script /^\p{Gurmukhi}/utf - \x{a83a} + \x{a83a} # Base script check /^\p{sc=Gujarati}/utf - \x{a81} + \x{a81} /^\p{Script=Gujr}/utf - \x{aff} + \x{aff} # Script extension check /^\p{Gujarati}/utf - \x{951} + \x{951} /^\p{Script_Extensions=Gujr}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Gujarati}/utf - \x{a836} + \x{a836} /^\p{sc=Gujarati}/utf - \x{a836} + \x{a836} # Character not in script /^\p{Gujarati}/utf - \x{a83a} + \x{a83a} # Base script check /^\p{sc=Oriya}/utf - \x{b01} + \x{b01} /^\p{Script=Orya}/utf - \x{b77} + \x{b77} # Script extension check /^\p{Oriya}/utf - \x{951} + \x{951} /^\p{scx=Orya}/utf - \x{1cf2} + \x{1cf2} # Script extension only character /^\p{Oriya}/utf - \x{1cda} + \x{1cda} /^\p{sc=Oriya}/utf - \x{1cda} + \x{1cda} # Character not in script /^\p{Oriya}/utf - \x{1cf3} + \x{1cf3} # Base script check /^\p{sc=Tamil}/utf - \x{b82} + \x{b82} /^\p{Script=Taml}/utf - \x{11fff} + \x{11fff} # Script extension check /^\p{Tamil}/utf - \x{951} + \x{951} /^\p{Script_Extensions=Taml}/utf - \x{11fd3} + \x{11fd3} # Script extension only character /^\p{Tamil}/utf - \x{a8f3} + \x{a8f3} /^\p{sc=Tamil}/utf - \x{a8f3} + \x{a8f3} # Character not in script /^\p{Tamil}/utf - \x{12000} + \x{12000} # Base script check /^\p{sc=Telugu}/utf - \x{c00} + \x{c00} /^\p{Script=Telu}/utf - \x{c7f} + \x{c7f} # Script extension check /^\p{Telugu}/utf - \x{951} + \x{951} /^\p{scx=Telu}/utf - \x{1cf2} + \x{1cf2} # Script extension only character /^\p{Telugu}/utf - \x{1cda} + \x{1cda} /^\p{sc=Telugu}/utf - \x{1cda} + \x{1cda} # Character not in script /^\p{Telugu}/utf - \x{1cf3} + \x{1cf3} # Base script check /^\p{sc=Kannada}/utf - \x{c80} + \x{c80} /^\p{Script=Knda}/utf - \x{cf3} + \x{cf3} # Script extension check /^\p{Kannada}/utf - \x{951} + \x{951} /^\p{Script_Extensions=Knda}/utf - \x{a835} + \x{a835} # Script extension only character /^\p{Kannada}/utf - \x{1cf4} + \x{1cf4} /^\p{sc=Kannada}/utf - \x{1cf4} + \x{1cf4} # Character not in script /^\p{Kannada}/utf - \x{a836} + \x{a836} # Base script check /^\p{sc=Malayalam}/utf - \x{d00} + \x{d00} /^\p{Script=Mlym}/utf - \x{d7f} + \x{d7f} # Script extension check /^\p{Malayalam}/utf - \x{951} + \x{951} /^\p{scx=Mlym}/utf - \x{a832} + \x{a832} # Script extension only character /^\p{Malayalam}/utf - \x{1cda} + \x{1cda} /^\p{sc=Malayalam}/utf - \x{1cda} + \x{1cda} # Character not in script /^\p{Malayalam}/utf - \x{a833} + \x{a833} # Base script check /^\p{sc=Sinhala}/utf - \x{d81} + \x{d81} /^\p{Script=Sinh}/utf - \x{111f4} + \x{111f4} # Script extension check /^\p{Sinhala}/utf - \x{964} + \x{964} /^\p{Script_Extensions=Sinh}/utf - \x{965} + \x{965} # Script extension only character /^\p{Sinhala}/utf - \x{964} + \x{964} /^\p{sc=Sinhala}/utf - \x{964} + \x{964} # Character not in script /^\p{Sinhala}/utf - \x{111f5} + \x{111f5} # Base script check /^\p{sc=Myanmar}/utf - \x{1000} + \x{1000} /^\p{Script=Mymr}/utf - \x{aa7f} + \x{aa7f} # Script extension check /^\p{Myanmar}/utf - \x{1040} + \x{1040} /^\p{scx=Mymr}/utf - \x{a92e} + \x{a92e} # Script extension only character /^\p{Myanmar}/utf - \x{a92e} + \x{a92e} /^\p{sc=Myanmar}/utf - \x{a92e} + \x{a92e} # Character not in script /^\p{Myanmar}/utf - \x{aa80} + \x{aa80} # Base script check /^\p{sc=Georgian}/utf - \x{10a0} + \x{10a0} /^\p{Script=Geor}/utf - \x{2d2d} + \x{2d2d} # Script extension check /^\p{Georgian}/utf - \x{10fb} + \x{10fb} /^\p{Script_Extensions=Geor}/utf - \x{10fb} + \x{10fb} # Script extension only character /^\p{Georgian}/utf - \x{10fb} + \x{10fb} /^\p{sc=Georgian}/utf - \x{10fb} + \x{10fb} # Character not in script /^\p{Georgian}/utf - \x{2d2e} + \x{2d2e} # Base script check /^\p{sc=Hangul}/utf - \x{1100} + \x{1100} /^\p{Script=Hang}/utf - \x{ffdc} + \x{ffdc} # Script extension check /^\p{Hangul}/utf - \x{3001} + \x{3001} /^\p{scx=Hang}/utf - \x{ff65} + \x{ff65} # Script extension only character /^\p{Hangul}/utf - \x{3003} + \x{3003} /^\p{sc=Hangul}/utf - \x{3003} + \x{3003} # Character not in script /^\p{Hangul}/utf - \x{ffdd} + \x{ffdd} # Base script check /^\p{sc=Mongolian}/utf - \x{1800} + \x{1800} /^\p{Script=Mong}/utf - \x{1166c} + \x{1166c} # Script extension check /^\p{Mongolian}/utf - \x{1802} + \x{1802} /^\p{Script_Extensions=Mong}/utf - \x{202f} + \x{202f} # Script extension only character /^\p{Mongolian}/utf - \x{202f} + \x{202f} /^\p{sc=Mongolian}/utf - \x{202f} + \x{202f} # Character not in script /^\p{Mongolian}/utf - \x{1166d} + \x{1166d} # Base script check /^\p{sc=Hiragana}/utf - \x{3041} + \x{3041} /^\p{Script=Hira}/utf - \x{1f200} + \x{1f200} # Script extension check /^\p{Hiragana}/utf - \x{3001} + \x{3001} /^\p{scx=Hira}/utf - \x{ff9f} + \x{ff9f} # Script extension only character /^\p{Hiragana}/utf - \x{3031} + \x{3031} /^\p{sc=Hiragana}/utf - \x{3031} + \x{3031} # Character not in script /^\p{Hiragana}/utf - \x{1f201} + \x{1f201} # Base script check /^\p{sc=Katakana}/utf - \x{30a1} + \x{30a1} /^\p{Script=Kana}/utf - \x{1b167} + \x{1b167} # Script extension check /^\p{Katakana}/utf - \x{3001} + \x{3001} /^\p{Script_Extensions=Kana}/utf - \x{ff9f} + \x{ff9f} # Script extension only character /^\p{Katakana}/utf - \x{3031} + \x{3031} /^\p{sc=Katakana}/utf - \x{3031} + \x{3031} # Character not in script /^\p{Katakana}/utf - \x{1b168} + \x{1b168} # Base script check /^\p{sc=Bopomofo}/utf - \x{2ea} + \x{2ea} /^\p{Script=Bopo}/utf - \x{31bf} + \x{31bf} # Script extension check /^\p{Bopomofo}/utf - \x{3001} + \x{3001} /^\p{scx=Bopo}/utf - \x{ff65} + \x{ff65} # Script extension only character /^\p{Bopomofo}/utf - \x{302a} + \x{302a} /^\p{sc=Bopomofo}/utf - \x{302a} + \x{302a} # Character not in script /^\p{Bopomofo}/utf - \x{ff66} + \x{ff66} # Base script check /^\p{sc=Han}/utf - \x{2e80} + \x{2e80} /^\p{Script=Hani}/utf - \x{323af} + \x{323af} # Script extension check /^\p{Han}/utf - \x{3001} + \x{3001} /^\p{Script_Extensions=Hani}/utf - \x{1f251} + \x{1f251} # Script extension only character /^\p{Han}/utf - \x{3006} + \x{3006} /^\p{sc=Han}/utf - \x{3006} + \x{3006} # Character not in script /^\p{Han}/utf - \x{323b0} + \x{323b0} # Base script check /^\p{sc=Yi}/utf - \x{a000} + \x{a000} /^\p{Script=Yiii}/utf - \x{a4c6} + \x{a4c6} # Script extension check /^\p{Yi}/utf - \x{3001} + \x{3001} /^\p{scx=Yiii}/utf - \x{ff65} + \x{ff65} # Script extension only character /^\p{Yi}/utf - \x{3001} + \x{3001} /^\p{sc=Yi}/utf - \x{3001} + \x{3001} # Character not in script /^\p{Yi}/utf - \x{ff66} + \x{ff66} # Base script check /^\p{sc=Tagalog}/utf - \x{1700} + \x{1700} /^\p{Script=Tglg}/utf - \x{171f} + \x{171f} # Script extension check /^\p{Tagalog}/utf - \x{1735} + \x{1735} /^\p{Script_Extensions=Tglg}/utf - \x{1736} + \x{1736} # Script extension only character /^\p{Tagalog}/utf - \x{1735} + \x{1735} /^\p{sc=Tagalog}/utf - \x{1735} + \x{1735} # Character not in script /^\p{Tagalog}/utf - \x{1737} + \x{1737} # Base script check /^\p{sc=Hanunoo}/utf - \x{1720} + \x{1720} /^\p{Script=Hano}/utf - \x{1734} + \x{1734} # Script extension check /^\p{Hanunoo}/utf - \x{1735} + \x{1735} /^\p{scx=Hano}/utf - \x{1736} + \x{1736} # Script extension only character /^\p{Hanunoo}/utf - \x{1735} + \x{1735} /^\p{sc=Hanunoo}/utf - \x{1735} + \x{1735} # Character not in script /^\p{Hanunoo}/utf - \x{1737} + \x{1737} # Base script check /^\p{sc=Buhid}/utf - \x{1740} + \x{1740} /^\p{Script=Buhd}/utf - \x{1753} + \x{1753} # Script extension check /^\p{Buhid}/utf - \x{1735} + \x{1735} /^\p{Script_Extensions=Buhd}/utf - \x{1736} + \x{1736} # Script extension only character /^\p{Buhid}/utf - \x{1735} + \x{1735} /^\p{sc=Buhid}/utf - \x{1735} + \x{1735} # Character not in script /^\p{Buhid}/utf - \x{1754} + \x{1754} # Base script check /^\p{sc=Tagbanwa}/utf - \x{1760} + \x{1760} /^\p{Script=Tagb}/utf - \x{1773} + \x{1773} # Script extension check /^\p{Tagbanwa}/utf - \x{1735} + \x{1735} /^\p{scx=Tagb}/utf - \x{1736} + \x{1736} # Script extension only character /^\p{Tagbanwa}/utf - \x{1735} + \x{1735} /^\p{sc=Tagbanwa}/utf - \x{1735} + \x{1735} # Character not in script /^\p{Tagbanwa}/utf - \x{1774} + \x{1774} # Base script check /^\p{sc=Limbu}/utf - \x{1900} + \x{1900} /^\p{Script=Limb}/utf - \x{194f} + \x{194f} # Script extension check /^\p{Limbu}/utf - \x{965} + \x{965} /^\p{Script_Extensions=Limb}/utf - \x{965} + \x{965} # Script extension only character /^\p{Limbu}/utf - \x{965} + \x{965} /^\p{sc=Limbu}/utf - \x{965} + \x{965} # Character not in script /^\p{Limbu}/utf - \x{1950} + \x{1950} # Base script check /^\p{sc=Tai_Le}/utf - \x{1950} + \x{1950} /^\p{Script=Tale}/utf - \x{1974} + \x{1974} # Script extension check /^\p{Tai_Le}/utf - \x{1040} + \x{1040} /^\p{scx=Tale}/utf - \x{1049} + \x{1049} # Script extension only character /^\p{Tai_Le}/utf - \x{1040} + \x{1040} /^\p{sc=Tai_Le}/utf - \x{1040} + \x{1040} # Character not in script /^\p{Tai_Le}/utf - \x{1975} + \x{1975} # Base script check /^\p{sc=Linear_B}/utf - \x{10000} + \x{10000} /^\p{Script=Linb}/utf - \x{100fa} + \x{100fa} # Script extension check /^\p{Linear_B}/utf - \x{10100} + \x{10100} /^\p{Script_Extensions=Linb}/utf - \x{1013f} + \x{1013f} # Script extension only character /^\p{Linear_B}/utf - \x{10102} + \x{10102} /^\p{sc=Linear_B}/utf - \x{10102} + \x{10102} # Character not in script /^\p{Linear_B}/utf - \x{10140} + \x{10140} # Base script check /^\p{sc=Cypriot}/utf - \x{10800} + \x{10800} /^\p{Script=Cprt}/utf - \x{1083f} + \x{1083f} # Script extension check /^\p{Cypriot}/utf - \x{10100} + \x{10100} /^\p{scx=Cprt}/utf - \x{1013f} + \x{1013f} # Script extension only character /^\p{Cypriot}/utf - \x{10102} + \x{10102} /^\p{sc=Cypriot}/utf - \x{10102} + \x{10102} # Character not in script /^\p{Cypriot}/utf - \x{10840} + \x{10840} # Base script check /^\p{sc=Buginese}/utf - \x{1a00} + \x{1a00} /^\p{Script=Bugi}/utf - \x{1a1f} + \x{1a1f} # Script extension check /^\p{Buginese}/utf - \x{a9cf} + \x{a9cf} /^\p{Script_Extensions=Bugi}/utf - \x{a9cf} + \x{a9cf} # Script extension only character /^\p{Buginese}/utf - \x{a9cf} + \x{a9cf} /^\p{sc=Buginese}/utf - \x{a9cf} + \x{a9cf} # Character not in script /^\p{Buginese}/utf - \x{a9d0} + \x{a9d0} # Base script check /^\p{sc=Coptic}/utf - \x{3e2} + \x{3e2} /^\p{Script=Copt}/utf - \x{2cff} + \x{2cff} # Script extension check /^\p{Coptic}/utf - \x{102e0} + \x{102e0} /^\p{scx=Copt}/utf - \x{102fb} + \x{102fb} # Script extension only character /^\p{Coptic}/utf - \x{102e0} + \x{102e0} /^\p{sc=Coptic}/utf - \x{102e0} + \x{102e0} # Character not in script /^\p{Coptic}/utf - \x{102fc} + \x{102fc} # Base script check /^\p{sc=Glagolitic}/utf - \x{2c00} + \x{2c00} /^\p{Script=Glag}/utf - \x{1e02a} + \x{1e02a} # Script extension check /^\p{Glagolitic}/utf - \x{484} + \x{484} /^\p{Script_Extensions=Glag}/utf - \x{a66f} + \x{a66f} # Script extension only character /^\p{Glagolitic}/utf - \x{484} + \x{484} /^\p{sc=Glagolitic}/utf - \x{484} + \x{484} # Character not in script /^\p{Glagolitic}/utf - \x{1e02b} + \x{1e02b} # Base script check /^\p{sc=Syloti_Nagri}/utf - \x{a800} + \x{a800} /^\p{Script=Sylo}/utf - \x{a82c} + \x{a82c} # Script extension check /^\p{Syloti_Nagri}/utf - \x{964} + \x{964} /^\p{scx=Sylo}/utf - \x{9ef} + \x{9ef} # Script extension only character /^\p{Syloti_Nagri}/utf - \x{9e6} + \x{9e6} /^\p{sc=Syloti_Nagri}/utf - \x{9e6} + \x{9e6} # Character not in script /^\p{Syloti_Nagri}/utf - \x{a82d} + \x{a82d} # Base script check /^\p{sc=Phags_Pa}/utf - \x{a840} + \x{a840} /^\p{Script=Phag}/utf - \x{a877} + \x{a877} # Script extension check /^\p{Phags_Pa}/utf - \x{1802} + \x{1802} /^\p{Script_Extensions=Phag}/utf - \x{1805} + \x{1805} # Script extension only character /^\p{Phags_Pa}/utf - \x{1802} + \x{1802} /^\p{sc=Phags_Pa}/utf - \x{1802} + \x{1802} # Character not in script /^\p{Phags_Pa}/utf - \x{a878} + \x{a878} # Base script check /^\p{sc=Nko}/utf - \x{7c0} + \x{7c0} /^\p{Script=Nkoo}/utf - \x{7ff} + \x{7ff} # Script extension check /^\p{Nko}/utf - \x{60c} + \x{60c} /^\p{scx=Nkoo}/utf - \x{fd3f} + \x{fd3f} # Script extension only character /^\p{Nko}/utf - \x{fd3e} + \x{fd3e} /^\p{sc=Nko}/utf - \x{fd3e} + \x{fd3e} # Character not in script /^\p{Nko}/utf - \x{fd40} + \x{fd40} # Base script check /^\p{sc=Kayah_Li}/utf - \x{a900} + \x{a900} /^\p{Script=Kali}/utf - \x{a92f} + \x{a92f} # Script extension check /^\p{Kayah_Li}/utf - \x{a92e} + \x{a92e} /^\p{Script_Extensions=Kali}/utf - \x{a92e} + \x{a92e} # Script extension only character /^\p{Kayah_Li}/utf - \x{a92e} + \x{a92e} /^\p{sc=Kayah_Li}/utf - \x{a92e} + \x{a92e} # Character not in script /^\p{Kayah_Li}/utf - \x{a930} + \x{a930} # Base script check /^\p{sc=Javanese}/utf - \x{a980} + \x{a980} /^\p{Script=Java}/utf - \x{a9df} + \x{a9df} # Script extension check /^\p{Javanese}/utf - \x{a9cf} + \x{a9cf} /^\p{scx=Java}/utf - \x{a9cf} + \x{a9cf} # Script extension only character /^\p{Javanese}/utf - \x{a9cf} + \x{a9cf} /^\p{sc=Javanese}/utf - \x{a9cf} + \x{a9cf} # Character not in script /^\p{Javanese}/utf - \x{a9e0} + \x{a9e0} # Base script check /^\p{sc=Kaithi}/utf - \x{11080} + \x{11080} /^\p{Script=Kthi}/utf - \x{110cd} + \x{110cd} # Script extension check /^\p{Kaithi}/utf - \x{966} + \x{966} /^\p{Script_Extensions=Kthi}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Kaithi}/utf - \x{966} + \x{966} /^\p{sc=Kaithi}/utf - \x{966} + \x{966} # Character not in script /^\p{Kaithi}/utf - \x{110ce} + \x{110ce} # Base script check /^\p{sc=Mandaic}/utf - \x{840} + \x{840} /^\p{Script=Mand}/utf - \x{85e} + \x{85e} # Script extension check /^\p{Mandaic}/utf - \x{640} + \x{640} /^\p{scx=Mand}/utf - \x{640} + \x{640} # Script extension only character /^\p{Mandaic}/utf - \x{640} + \x{640} /^\p{sc=Mandaic}/utf - \x{640} + \x{640} # Character not in script /^\p{Mandaic}/utf - \x{85f} + \x{85f} # Base script check /^\p{sc=Chakma}/utf - \x{11100} + \x{11100} /^\p{Script=Cakm}/utf - \x{11147} + \x{11147} # Script extension check /^\p{Chakma}/utf - \x{9e6} + \x{9e6} /^\p{Script_Extensions=Cakm}/utf - \x{1049} + \x{1049} # Script extension only character /^\p{Chakma}/utf - \x{9e6} + \x{9e6} /^\p{sc=Chakma}/utf - \x{9e6} + \x{9e6} # Character not in script /^\p{Chakma}/utf - \x{11148} + \x{11148} # Base script check /^\p{sc=Sharada}/utf - \x{11180} + \x{11180} /^\p{Script=Shrd}/utf - \x{111df} + \x{111df} # Script extension check /^\p{Sharada}/utf - \x{951} + \x{951} /^\p{scx=Shrd}/utf - \x{1ce0} + \x{1ce0} # Script extension only character /^\p{Sharada}/utf - \x{1cd7} + \x{1cd7} /^\p{sc=Sharada}/utf - \x{1cd7} + \x{1cd7} # Character not in script /^\p{Sharada}/utf - \x{111e0} + \x{111e0} # Base script check /^\p{sc=Takri}/utf - \x{11680} + \x{11680} /^\p{Script=Takr}/utf - \x{116c9} + \x{116c9} # Script extension check /^\p{Takri}/utf - \x{964} + \x{964} /^\p{Script_Extensions=Takr}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Takri}/utf - \x{a836} + \x{a836} /^\p{sc=Takri}/utf - \x{a836} + \x{a836} # Character not in script /^\p{Takri}/utf - \x{116ca} + \x{116ca} # Base script check /^\p{sc=Duployan}/utf - \x{1bc00} + \x{1bc00} /^\p{Script=Dupl}/utf - \x{1bc9f} + \x{1bc9f} # Script extension check /^\p{Duployan}/utf - \x{1bca0} + \x{1bca0} /^\p{scx=Dupl}/utf - \x{1bca3} + \x{1bca3} # Script extension only character /^\p{Duployan}/utf - \x{1bca0} + \x{1bca0} /^\p{sc=Duployan}/utf - \x{1bca0} + \x{1bca0} # Character not in script /^\p{Duployan}/utf - \x{1bca4} + \x{1bca4} # Base script check /^\p{sc=Grantha}/utf - \x{11300} + \x{11300} /^\p{Script=Gran}/utf - \x{11374} + \x{11374} # Script extension check /^\p{Grantha}/utf - \x{951} + \x{951} /^\p{Script_Extensions=Gran}/utf - \x{11fd3} + \x{11fd3} # Script extension only character /^\p{Grantha}/utf - \x{1cd3} + \x{1cd3} /^\p{sc=Grantha}/utf - \x{1cd3} + \x{1cd3} # Character not in script /^\p{Grantha}/utf - \x{11fd4} + \x{11fd4} # Base script check /^\p{sc=Khojki}/utf - \x{11200} + \x{11200} /^\p{Script=Khoj}/utf - \x{11241} + \x{11241} # Script extension check /^\p{Khojki}/utf - \x{ae6} + \x{ae6} /^\p{scx=Khoj}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Khojki}/utf - \x{ae6} + \x{ae6} /^\p{sc=Khojki}/utf - \x{ae6} + \x{ae6} # Character not in script /^\p{Khojki}/utf - \x{11242} + \x{11242} # Base script check /^\p{sc=Linear_A}/utf - \x{10600} + \x{10600} /^\p{Script=Lina}/utf - \x{10767} + \x{10767} # Script extension check /^\p{Linear_A}/utf - \x{10107} + \x{10107} /^\p{Script_Extensions=Lina}/utf - \x{10133} + \x{10133} # Script extension only character /^\p{Linear_A}/utf - \x{10107} + \x{10107} /^\p{sc=Linear_A}/utf - \x{10107} + \x{10107} # Character not in script /^\p{Linear_A}/utf - \x{10768} + \x{10768} # Base script check /^\p{sc=Mahajani}/utf - \x{11150} + \x{11150} /^\p{Script=Mahj}/utf - \x{11176} + \x{11176} # Script extension check /^\p{Mahajani}/utf - \x{964} + \x{964} /^\p{scx=Mahj}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Mahajani}/utf - \x{966} + \x{966} /^\p{sc=Mahajani}/utf - \x{966} + \x{966} # Character not in script /^\p{Mahajani}/utf - \x{11177} + \x{11177} # Base script check /^\p{sc=Manichaean}/utf - \x{10ac0} + \x{10ac0} /^\p{Script=Mani}/utf - \x{10af6} + \x{10af6} # Script extension check /^\p{Manichaean}/utf - \x{640} + \x{640} /^\p{Script_Extensions=Mani}/utf - \x{10af2} + \x{10af2} # Script extension only character /^\p{Manichaean}/utf - \x{640} + \x{640} /^\p{sc=Manichaean}/utf - \x{640} + \x{640} # Character not in script /^\p{Manichaean}/utf - \x{10af7} + \x{10af7} # Base script check /^\p{sc=Modi}/utf - \x{11600} + \x{11600} /^\p{Script=Modi}/utf - \x{11659} + \x{11659} # Script extension check /^\p{Modi}/utf - \x{a830} + \x{a830} /^\p{scx=Modi}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Modi}/utf - \x{a836} + \x{a836} /^\p{sc=Modi}/utf - \x{a836} + \x{a836} # Character not in script /^\p{Modi}/utf - \x{1165a} + \x{1165a} # Base script check /^\p{sc=Old_Permic}/utf - \x{10350} + \x{10350} /^\p{Script=Perm}/utf - \x{1037a} + \x{1037a} # Script extension check /^\p{Old_Permic}/utf - \x{483} + \x{483} /^\p{Script_Extensions=Perm}/utf - \x{483} + \x{483} # Script extension only character /^\p{Old_Permic}/utf - \x{483} + \x{483} /^\p{sc=Old_Permic}/utf - \x{483} + \x{483} # Character not in script /^\p{Old_Permic}/utf - \x{1037b} + \x{1037b} # Base script check /^\p{sc=Psalter_Pahlavi}/utf - \x{10b80} + \x{10b80} /^\p{Script=Phlp}/utf - \x{10baf} + \x{10baf} # Script extension check /^\p{Psalter_Pahlavi}/utf - \x{640} + \x{640} /^\p{scx=Phlp}/utf - \x{640} + \x{640} # Script extension only character /^\p{Psalter_Pahlavi}/utf - \x{640} + \x{640} /^\p{sc=Psalter_Pahlavi}/utf - \x{640} + \x{640} # Character not in script /^\p{Psalter_Pahlavi}/utf - \x{10bb0} + \x{10bb0} # Base script check /^\p{sc=Khudawadi}/utf - \x{112b0} + \x{112b0} /^\p{Script=Sind}/utf - \x{112f9} + \x{112f9} # Script extension check /^\p{Khudawadi}/utf - \x{964} + \x{964} /^\p{Script_Extensions=Sind}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Khudawadi}/utf - \x{a836} + \x{a836} /^\p{sc=Khudawadi}/utf - \x{a836} + \x{a836} # Character not in script /^\p{Khudawadi}/utf - \x{112fa} + \x{112fa} # Base script check /^\p{sc=Tirhuta}/utf - \x{11480} + \x{11480} /^\p{Script=Tirh}/utf - \x{114d9} + \x{114d9} # Script extension check /^\p{Tirhuta}/utf - \x{951} + \x{951} /^\p{scx=Tirh}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Tirhuta}/utf - \x{1cf2} + \x{1cf2} /^\p{sc=Tirhuta}/utf - \x{1cf2} + \x{1cf2} # Character not in script /^\p{Tirhuta}/utf - \x{114da} + \x{114da} # Base script check /^\p{sc=Multani}/utf - \x{11280} + \x{11280} /^\p{Script=Mult}/utf - \x{112a9} + \x{112a9} # Script extension check /^\p{Multani}/utf - \x{a66} + \x{a66} /^\p{Script_Extensions=Mult}/utf - \x{a6f} + \x{a6f} # Script extension only character /^\p{Multani}/utf - \x{a66} + \x{a66} /^\p{sc=Multani}/utf - \x{a66} + \x{a66} # Character not in script /^\p{Multani}/utf - \x{112aa} + \x{112aa} # Base script check /^\p{sc=Adlam}/utf - \x{1e900} + \x{1e900} /^\p{Script=Adlm}/utf - \x{1e95f} + \x{1e95f} # Script extension check /^\p{Adlam}/utf - \x{61f} + \x{61f} /^\p{scx=Adlm}/utf - \x{640} + \x{640} # Script extension only character /^\p{Adlam}/utf - \x{61f} + \x{61f} /^\p{sc=Adlam}/utf - \x{61f} + \x{61f} # Character not in script /^\p{Adlam}/utf - \x{1e960} + \x{1e960} # Base script check /^\p{sc=Masaram_Gondi}/utf - \x{11d00} + \x{11d00} /^\p{Script=Gonm}/utf - \x{11d59} + \x{11d59} # Script extension check /^\p{Masaram_Gondi}/utf - \x{964} + \x{964} /^\p{Script_Extensions=Gonm}/utf - \x{965} + \x{965} # Script extension only character /^\p{Masaram_Gondi}/utf - \x{964} + \x{964} /^\p{sc=Masaram_Gondi}/utf - \x{964} + \x{964} # Character not in script /^\p{Masaram_Gondi}/utf - \x{11d5a} + \x{11d5a} # Base script check /^\p{sc=Dogra}/utf - \x{11800} + \x{11800} /^\p{Script=Dogr}/utf - \x{1183b} + \x{1183b} # Script extension check /^\p{Dogra}/utf - \x{964} + \x{964} /^\p{scx=Dogr}/utf - \x{a839} + \x{a839} # Script extension only character /^\p{Dogra}/utf - \x{966} + \x{966} /^\p{sc=Dogra}/utf - \x{966} + \x{966} # Character not in script /^\p{Dogra}/utf - \x{1183c} + \x{1183c} # Base script check /^\p{sc=Gunjala_Gondi}/utf - \x{11d60} + \x{11d60} /^\p{Script=Gong}/utf - \x{11da9} + \x{11da9} # Script extension check /^\p{Gunjala_Gondi}/utf - \x{964} + \x{964} /^\p{Script_Extensions=Gong}/utf - \x{965} + \x{965} # Script extension only character /^\p{Gunjala_Gondi}/utf - \x{964} + \x{964} /^\p{sc=Gunjala_Gondi}/utf - \x{964} + \x{964} # Character not in script /^\p{Gunjala_Gondi}/utf - \x{11daa} + \x{11daa} # Base script check /^\p{sc=Hanifi_Rohingya}/utf - \x{10d00} + \x{10d00} /^\p{Script=Rohg}/utf - \x{10d39} + \x{10d39} # Script extension check /^\p{Hanifi_Rohingya}/utf - \x{60c} + \x{60c} /^\p{scx=Rohg}/utf - \x{6d4} + \x{6d4} # Script extension only character /^\p{Hanifi_Rohingya}/utf - \x{6d4} + \x{6d4} /^\p{sc=Hanifi_Rohingya}/utf - \x{6d4} + \x{6d4} # Character not in script /^\p{Hanifi_Rohingya}/utf - \x{10d3a} + \x{10d3a} # Base script check /^\p{sc=Sogdian}/utf - \x{10f30} + \x{10f30} /^\p{Script=Sogd}/utf - \x{10f59} + \x{10f59} # Script extension check /^\p{Sogdian}/utf - \x{640} + \x{640} /^\p{Script_Extensions=Sogd}/utf - \x{640} + \x{640} # Script extension only character /^\p{Sogdian}/utf - \x{640} + \x{640} /^\p{sc=Sogdian}/utf - \x{640} + \x{640} # Character not in script /^\p{Sogdian}/utf - \x{10f5a} + \x{10f5a} # Base script check /^\p{sc=Nandinagari}/utf - \x{119a0} + \x{119a0} /^\p{Script=Nand}/utf - \x{119e4} + \x{119e4} # Script extension check /^\p{Nandinagari}/utf - \x{964} + \x{964} /^\p{scx=Nand}/utf - \x{a835} + \x{a835} # Script extension only character /^\p{Nandinagari}/utf - \x{1cfa} + \x{1cfa} /^\p{sc=Nandinagari}/utf - \x{1cfa} + \x{1cfa} # Character not in script /^\p{Nandinagari}/utf - \x{119e5} + \x{119e5} # Base script check /^\p{sc=Yezidi}/utf - \x{10e80} + \x{10e80} /^\p{Script=Yezi}/utf - \x{10eb1} + \x{10eb1} # Script extension check /^\p{Yezidi}/utf - \x{60c} + \x{60c} /^\p{Script_Extensions=Yezi}/utf - \x{669} + \x{669} # Script extension only character /^\p{Yezidi}/utf - \x{660} + \x{660} /^\p{sc=Yezidi}/utf - \x{660} + \x{660} # Character not in script /^\p{Yezidi}/utf - \x{10eb2} + \x{10eb2} # Base script check /^\p{sc=Cypro_Minoan}/utf - \x{12f90} + \x{12f90} /^\p{Script=Cpmn}/utf - \x{12ff2} + \x{12ff2} # Script extension check /^\p{Cypro_Minoan}/utf - \x{10100} + \x{10100} /^\p{scx=Cpmn}/utf - \x{10101} + \x{10101} # Script extension only character /^\p{Cypro_Minoan}/utf - \x{10100} + \x{10100} /^\p{sc=Cypro_Minoan}/utf - \x{10100} + \x{10100} # Character not in script /^\p{Cypro_Minoan}/utf - \x{12ff3} + \x{12ff3} # Base script check /^\p{sc=Old_Uyghur}/utf - \x{10f70} + \x{10f70} /^\p{Script=Ougr}/utf - \x{10f89} + \x{10f89} # Script extension check /^\p{Old_Uyghur}/utf - \x{640} + \x{640} /^\p{Script_Extensions=Ougr}/utf - \x{10af2} + \x{10af2} # Script extension only character /^\p{Old_Uyghur}/utf - \x{10af2} + \x{10af2} /^\p{sc=Old_Uyghur}/utf - \x{10af2} + \x{10af2} # Character not in script /^\p{Old_Uyghur}/utf - \x{10f8a} + \x{10f8a} # Base script check /^\p{sc=Common}/utf - \x{00} + \x{00} /^\p{Script=Zyyy}/utf - \x{e007f} + \x{e007f} # Character not in script /^\p{Common}/utf - \x{e0080} + \x{e0080} # Base script check /^\p{sc=Armenian}/utf - \x{531} + \x{531} /^\p{Script=Armn}/utf - \x{fb17} + \x{fb17} # Character not in script /^\p{Armenian}/utf - \x{fb18} + \x{fb18} # Base script check /^\p{sc=Hebrew}/utf - \x{591} + \x{591} /^\p{Script=Hebr}/utf - \x{fb4f} + \x{fb4f} # Character not in script /^\p{Hebrew}/utf - \x{fb50} + \x{fb50} # Base script check /^\p{sc=Thai}/utf - \x{e01} + \x{e01} /^\p{Script=Thai}/utf - \x{e5b} + \x{e5b} # Character not in script /^\p{Thai}/utf - \x{e5c} + \x{e5c} # Base script check /^\p{sc=Lao}/utf - \x{e81} + \x{e81} /^\p{Script=Laoo}/utf - \x{edf} + \x{edf} # Character not in script /^\p{Lao}/utf - \x{ee0} + \x{ee0} # Base script check /^\p{sc=Tibetan}/utf - \x{f00} + \x{f00} /^\p{Script=Tibt}/utf - \x{fda} + \x{fda} # Character not in script /^\p{Tibetan}/utf - \x{fdb} + \x{fdb} # Base script check /^\p{sc=Ethiopic}/utf - \x{1200} + \x{1200} /^\p{Script=Ethi}/utf - \x{1e7fe} + \x{1e7fe} # Character not in script /^\p{Ethiopic}/utf - \x{1e7ff} + \x{1e7ff} # Base script check /^\p{sc=Cherokee}/utf - \x{13a0} + \x{13a0} /^\p{Script=Cher}/utf - \x{abbf} + \x{abbf} # Character not in script /^\p{Cherokee}/utf - \x{abc0} + \x{abc0} # Base script check /^\p{sc=Canadian_Aboriginal}/utf - \x{1400} + \x{1400} /^\p{Script=Cans}/utf - \x{11abf} + \x{11abf} # Character not in script /^\p{Canadian_Aboriginal}/utf - \x{11ac0} + \x{11ac0} # Base script check /^\p{sc=Ogham}/utf - \x{1680} + \x{1680} /^\p{Script=Ogam}/utf - \x{169c} + \x{169c} # Character not in script /^\p{Ogham}/utf - \x{169d} + \x{169d} # Base script check /^\p{sc=Runic}/utf - \x{16a0} + \x{16a0} /^\p{Script=Runr}/utf - \x{16f8} + \x{16f8} # Character not in script /^\p{Runic}/utf - \x{16f9} + \x{16f9} # Base script check /^\p{sc=Khmer}/utf - \x{1780} + \x{1780} /^\p{Script=Khmr}/utf - \x{19ff} + \x{19ff} # Character not in script /^\p{Khmer}/utf - \x{1a00} + \x{1a00} # Base script check /^\p{sc=Old_Italic}/utf - \x{10300} + \x{10300} /^\p{Script=Ital}/utf - \x{1032f} + \x{1032f} # Character not in script /^\p{Old_Italic}/utf - \x{10330} + \x{10330} # Base script check /^\p{sc=Gothic}/utf - \x{10330} + \x{10330} /^\p{Script=Goth}/utf - \x{1034a} + \x{1034a} # Character not in script /^\p{Gothic}/utf - \x{1034b} + \x{1034b} # Base script check /^\p{sc=Deseret}/utf - \x{10400} + \x{10400} /^\p{Script=Dsrt}/utf - \x{1044f} + \x{1044f} # Character not in script /^\p{Deseret}/utf - \x{10450} + \x{10450} # Base script check /^\p{sc=Inherited}/utf - \x{300} + \x{300} /^\p{Script=Zinh}/utf - \x{e01ef} + \x{e01ef} # Character not in script /^\p{Inherited}/utf - \x{e01f0} + \x{e01f0} # Base script check /^\p{sc=Ugaritic}/utf - \x{10380} + \x{10380} /^\p{Script=Ugar}/utf - \x{1039f} + \x{1039f} # Character not in script /^\p{Ugaritic}/utf - \x{103a0} + \x{103a0} # Base script check /^\p{sc=Shavian}/utf - \x{10450} + \x{10450} /^\p{Script=Shaw}/utf - \x{1047f} + \x{1047f} # Character not in script /^\p{Shavian}/utf - \x{10480} + \x{10480} # Base script check /^\p{sc=Osmanya}/utf - \x{10480} + \x{10480} /^\p{Script=Osma}/utf - \x{104a9} + \x{104a9} # Character not in script /^\p{Osmanya}/utf - \x{104aa} + \x{104aa} # Base script check /^\p{sc=Braille}/utf - \x{2800} + \x{2800} /^\p{Script=Brai}/utf - \x{28ff} + \x{28ff} # Character not in script /^\p{Braille}/utf - \x{2900} + \x{2900} # Base script check /^\p{sc=New_Tai_Lue}/utf - \x{1980} + \x{1980} /^\p{Script=Talu}/utf - \x{19df} + \x{19df} # Character not in script /^\p{New_Tai_Lue}/utf - \x{19e0} + \x{19e0} # Base script check /^\p{sc=Tifinagh}/utf - \x{2d30} + \x{2d30} /^\p{Script=Tfng}/utf - \x{2d7f} + \x{2d7f} # Character not in script /^\p{Tifinagh}/utf - \x{2d80} + \x{2d80} # Base script check /^\p{sc=Old_Persian}/utf - \x{103a0} + \x{103a0} /^\p{Script=Xpeo}/utf - \x{103d5} + \x{103d5} # Character not in script /^\p{Old_Persian}/utf - \x{103d6} + \x{103d6} # Base script check /^\p{sc=Kharoshthi}/utf - \x{10a00} + \x{10a00} /^\p{Script=Khar}/utf - \x{10a58} + \x{10a58} # Character not in script /^\p{Kharoshthi}/utf - \x{10a59} + \x{10a59} # Base script check /^\p{sc=Balinese}/utf - \x{1b00} + \x{1b00} /^\p{Script=Bali}/utf - \x{1b7e} + \x{1b7e} # Character not in script /^\p{Balinese}/utf - \x{1b7f} + \x{1b8f} # Base script check /^\p{sc=Cuneiform}/utf - \x{12000} + \x{12000} /^\p{Script=Xsux}/utf - \x{12543} + \x{12543} # Character not in script /^\p{Cuneiform}/utf - \x{12544} + \x{12544} # Base script check /^\p{sc=Phoenician}/utf - \x{10900} + \x{10900} /^\p{Script=Phnx}/utf - \x{1091f} + \x{1091f} # Character not in script /^\p{Phoenician}/utf - \x{10920} + \x{10920} # Base script check /^\p{sc=Sundanese}/utf - \x{1b80} + \x{1b80} /^\p{Script=Sund}/utf - \x{1cc7} + \x{1cc7} # Character not in script /^\p{Sundanese}/utf - \x{1cc8} + \x{1cc8} # Base script check /^\p{sc=Lepcha}/utf - \x{1c00} + \x{1c00} /^\p{Script=Lepc}/utf - \x{1c4f} + \x{1c4f} # Character not in script /^\p{Lepcha}/utf - \x{1c50} + \x{1c50} # Base script check /^\p{sc=Ol_Chiki}/utf - \x{1c50} + \x{1c50} /^\p{Script=Olck}/utf - \x{1c7f} + \x{1c7f} # Character not in script /^\p{Ol_Chiki}/utf - \x{1c80} + \x{1c80} # Base script check /^\p{sc=Vai}/utf - \x{a500} + \x{a500} /^\p{Script=Vaii}/utf - \x{a62b} + \x{a62b} # Character not in script /^\p{Vai}/utf - \x{a62c} + \x{a62c} # Base script check /^\p{sc=Saurashtra}/utf - \x{a880} + \x{a880} /^\p{Script=Saur}/utf - \x{a8d9} + \x{a8d9} # Character not in script /^\p{Saurashtra}/utf - \x{a8da} + \x{a8da} # Base script check /^\p{sc=Rejang}/utf - \x{a930} + \x{a930} /^\p{Script=Rjng}/utf - \x{a95f} + \x{a95f} # Character not in script /^\p{Rejang}/utf - \x{a960} + \x{a960} # Base script check /^\p{sc=Lycian}/utf - \x{10280} + \x{10280} /^\p{Script=Lyci}/utf - \x{1029c} + \x{1029c} # Character not in script /^\p{Lycian}/utf - \x{1029d} + \x{1029d} # Base script check /^\p{sc=Carian}/utf - \x{102a0} + \x{102a0} /^\p{Script=Cari}/utf - \x{102d0} + \x{102d0} # Character not in script /^\p{Carian}/utf - \x{102d1} + \x{102d1} # Base script check /^\p{sc=Lydian}/utf - \x{10920} + \x{10920} /^\p{Script=Lydi}/utf - \x{1093f} + \x{1093f} # Character not in script /^\p{Lydian}/utf - \x{10940} + \x{10940} # Base script check /^\p{sc=Cham}/utf - \x{aa00} + \x{aa00} /^\p{Script=Cham}/utf - \x{aa5f} + \x{aa5f} # Character not in script /^\p{Cham}/utf - \x{aa60} + \x{aa60} # Base script check /^\p{sc=Tai_Tham}/utf - \x{1a20} + \x{1a20} /^\p{Script=Lana}/utf - \x{1aad} + \x{1aad} # Character not in script /^\p{Tai_Tham}/utf - \x{1aae} + \x{1aae} # Base script check /^\p{sc=Tai_Viet}/utf - \x{aa80} + \x{aa80} /^\p{Script=Tavt}/utf - \x{aadf} + \x{aadf} # Character not in script /^\p{Tai_Viet}/utf - \x{aae0} + \x{aae0} # Base script check /^\p{sc=Avestan}/utf - \x{10b00} + \x{10b00} /^\p{Script=Avst}/utf - \x{10b3f} + \x{10b3f} # Character not in script /^\p{Avestan}/utf - \x{10b40} + \x{10b40} # Base script check /^\p{sc=Egyptian_Hieroglyphs}/utf - \x{13000} + \x{13000} /^\p{Script=Egyp}/utf - \x{13455} + \x{13455} # Character not in script /^\p{Egyptian_Hieroglyphs}/utf - \x{13456} + \x{13456} # Base script check /^\p{sc=Samaritan}/utf - \x{800} + \x{800} /^\p{Script=Samr}/utf - \x{83e} + \x{83e} # Character not in script /^\p{Samaritan}/utf - \x{83f} + \x{83f} # Base script check /^\p{sc=Lisu}/utf - \x{a4d0} + \x{a4d0} /^\p{Script=Lisu}/utf - \x{11fb0} + \x{11fb0} # Character not in script /^\p{Lisu}/utf - \x{11fb1} + \x{11fb1} # Base script check /^\p{sc=Bamum}/utf - \x{a6a0} + \x{a6a0} /^\p{Script=Bamu}/utf - \x{16a38} + \x{16a38} # Character not in script /^\p{Bamum}/utf - \x{16a39} + \x{16a39} # Base script check /^\p{sc=Meetei_Mayek}/utf - \x{aae0} + \x{aae0} /^\p{Script=Mtei}/utf - \x{abf9} + \x{abf9} # Character not in script /^\p{Meetei_Mayek}/utf - \x{abfa} + \x{abfa} # Base script check /^\p{sc=Imperial_Aramaic}/utf - \x{10840} + \x{10840} /^\p{Script=Armi}/utf - \x{1085f} + \x{1085f} # Character not in script /^\p{Imperial_Aramaic}/utf - \x{10860} + \x{10860} # Base script check /^\p{sc=Old_South_Arabian}/utf - \x{10a60} + \x{10a60} /^\p{Script=Sarb}/utf - \x{10a7f} + \x{10a7f} # Character not in script /^\p{Old_South_Arabian}/utf - \x{10a80} + \x{10a80} # Base script check /^\p{sc=Inscriptional_Parthian}/utf - \x{10b40} + \x{10b40} /^\p{Script=Prti}/utf - \x{10b5f} + \x{10b5f} # Character not in script /^\p{Inscriptional_Parthian}/utf - \x{10b60} + \x{10b60} # Base script check /^\p{sc=Inscriptional_Pahlavi}/utf - \x{10b60} + \x{10b60} /^\p{Script=Phli}/utf - \x{10b7f} + \x{10b7f} # Character not in script /^\p{Inscriptional_Pahlavi}/utf - \x{10b80} + \x{10b80} # Base script check /^\p{sc=Old_Turkic}/utf - \x{10c00} + \x{10c00} /^\p{Script=Orkh}/utf - \x{10c48} + \x{10c48} # Character not in script /^\p{Old_Turkic}/utf - \x{10c49} + \x{10c49} # Base script check /^\p{sc=Batak}/utf - \x{1bc0} + \x{1bc0} /^\p{Script=Batk}/utf - \x{1bff} + \x{1bff} # Character not in script /^\p{Batak}/utf - \x{1c00} + \x{1c00} # Base script check /^\p{sc=Brahmi}/utf - \x{11000} + \x{11000} /^\p{Script=Brah}/utf - \x{1107f} + \x{1107f} # Character not in script /^\p{Brahmi}/utf - \x{11080} + \x{11080} # Base script check /^\p{sc=Meroitic_Cursive}/utf - \x{109a0} + \x{109a0} /^\p{Script=Merc}/utf - \x{109ff} + \x{109ff} # Character not in script /^\p{Meroitic_Cursive}/utf - \x{10a00} + \x{10a00} # Base script check /^\p{sc=Meroitic_Hieroglyphs}/utf - \x{10980} + \x{10980} /^\p{Script=Mero}/utf - \x{1099f} + \x{1099f} # Character not in script /^\p{Meroitic_Hieroglyphs}/utf - \x{109a0} + \x{109a0} # Base script check /^\p{sc=Miao}/utf - \x{16f00} + \x{16f00} /^\p{Script=Plrd}/utf - \x{16f9f} + \x{16f9f} # Character not in script /^\p{Miao}/utf - \x{16fa0} + \x{16fa0} # Base script check /^\p{sc=Sora_Sompeng}/utf - \x{110d0} + \x{110d0} /^\p{Script=Sora}/utf - \x{110f9} + \x{110f9} # Character not in script /^\p{Sora_Sompeng}/utf - \x{110fa} + \x{110fa} # Base script check /^\p{sc=Caucasian_Albanian}/utf - \x{10530} + \x{10530} /^\p{Script=Aghb}/utf - \x{1056f} + \x{1056f} # Character not in script /^\p{Caucasian_Albanian}/utf - \x{10570} + \x{10570} # Base script check /^\p{sc=Bassa_Vah}/utf - \x{16ad0} + \x{16ad0} /^\p{Script=Bass}/utf - \x{16af5} + \x{16af5} # Character not in script /^\p{Bassa_Vah}/utf - \x{16af6} + \x{16af6} # Base script check /^\p{sc=Elbasan}/utf - \x{10500} + \x{10500} /^\p{Script=Elba}/utf - \x{10527} + \x{10527} # Character not in script /^\p{Elbasan}/utf - \x{10528} + \x{10528} # Base script check /^\p{sc=Pahawh_Hmong}/utf - \x{16b00} + \x{16b00} /^\p{Script=Hmng}/utf - \x{16b8f} + \x{16b8f} # Character not in script /^\p{Pahawh_Hmong}/utf - \x{16b90} + \x{16b90} # Base script check /^\p{sc=Mende_Kikakui}/utf - \x{1e800} + \x{1e800} /^\p{Script=Mend}/utf - \x{1e8d6} + \x{1e8d6} # Character not in script /^\p{Mende_Kikakui}/utf - \x{1e8d7} + \x{1e8d7} # Base script check /^\p{sc=Mro}/utf - \x{16a40} + \x{16a40} /^\p{Script=Mroo}/utf - \x{16a6f} + \x{16a6f} # Character not in script /^\p{Mro}/utf - \x{16a70} + \x{16a70} # Base script check /^\p{sc=Old_North_Arabian}/utf - \x{10a80} + \x{10a80} /^\p{Script=Narb}/utf - \x{10a9f} + \x{10a9f} # Character not in script /^\p{Old_North_Arabian}/utf - \x{10aa0} + \x{10aa0} # Base script check /^\p{sc=Nabataean}/utf - \x{10880} + \x{10880} /^\p{Script=Nbat}/utf - \x{108af} + \x{108af} # Character not in script /^\p{Nabataean}/utf - \x{108b0} + \x{108b0} # Base script check /^\p{sc=Palmyrene}/utf - \x{10860} + \x{10860} /^\p{Script=Palm}/utf - \x{1087f} + \x{1087f} # Character not in script /^\p{Palmyrene}/utf - \x{10880} + \x{10880} # Base script check /^\p{sc=Pau_Cin_Hau}/utf - \x{11ac0} + \x{11ac0} /^\p{Script=Pauc}/utf - \x{11af8} + \x{11af8} # Character not in script /^\p{Pau_Cin_Hau}/utf - \x{11af9} + \x{11af9} # Base script check /^\p{sc=Siddham}/utf - \x{11580} + \x{11580} /^\p{Script=Sidd}/utf - \x{115dd} + \x{115dd} # Character not in script /^\p{Siddham}/utf - \x{115de} + \x{115de} # Base script check /^\p{sc=Warang_Citi}/utf - \x{118a0} + \x{118a0} /^\p{Script=Wara}/utf - \x{118ff} + \x{118ff} # Character not in script /^\p{Warang_Citi}/utf - \x{11900} + \x{11900} # Base script check /^\p{sc=Ahom}/utf - \x{11700} + \x{11700} /^\p{Script=Ahom}/utf - \x{11746} + \x{11746} # Character not in script /^\p{Ahom}/utf - \x{11747} + \x{11747} # Base script check /^\p{sc=Anatolian_Hieroglyphs}/utf - \x{14400} + \x{14400} /^\p{Script=Hluw}/utf - \x{14646} + \x{14646} # Character not in script /^\p{Anatolian_Hieroglyphs}/utf - \x{14647} + \x{14647} # Base script check /^\p{sc=Hatran}/utf - \x{108e0} + \x{108e0} /^\p{Script=Hatr}/utf - \x{108ff} + \x{108ff} # Character not in script /^\p{Hatran}/utf - \x{10900} + \x{10900} # Base script check /^\p{sc=Old_Hungarian}/utf - \x{10c80} + \x{10c80} /^\p{Script=Hung}/utf - \x{10cff} + \x{10cff} # Character not in script /^\p{Old_Hungarian}/utf - \x{10d00} + \x{10d00} # Base script check /^\p{sc=SignWriting}/utf - \x{1d800} + \x{1d800} /^\p{Script=Sgnw}/utf - \x{1daaf} + \x{1daaf} # Character not in script /^\p{SignWriting}/utf - \x{1dab0} + \x{1dab0} # Base script check /^\p{sc=Bhaiksuki}/utf - \x{11c00} + \x{11c00} /^\p{Script=Bhks}/utf - \x{11c6c} + \x{11c6c} # Character not in script /^\p{Bhaiksuki}/utf - \x{11c6d} + \x{11c6d} # Base script check /^\p{sc=Marchen}/utf - \x{11c70} + \x{11c70} /^\p{Script=Marc}/utf - \x{11cb6} + \x{11cb6} # Character not in script /^\p{Marchen}/utf - \x{11cb7} + \x{11cb7} # Base script check /^\p{sc=Newa}/utf - \x{11400} + \x{11400} /^\p{Script=Newa}/utf - \x{11461} + \x{11461} # Character not in script /^\p{Newa}/utf - \x{11462} + \x{11462} # Base script check /^\p{sc=Osage}/utf - \x{104b0} + \x{104b0} /^\p{Script=Osge}/utf - \x{104fb} + \x{104fb} # Character not in script /^\p{Osage}/utf - \x{104fc} + \x{104fc} # Base script check /^\p{sc=Tangut}/utf - \x{16fe0} + \x{16fe0} /^\p{Script=Tang}/utf - \x{18d08} + \x{18d08} # Character not in script /^\p{Tangut}/utf - \x{18d09} + \x{18d09} # Base script check /^\p{sc=Nushu}/utf - \x{16fe1} + \x{16fe1} /^\p{Script=Nshu}/utf - \x{1b2fb} + \x{1b2fb} # Character not in script /^\p{Nushu}/utf - \x{1b2fc} + \x{1b2fc} # Base script check /^\p{sc=Soyombo}/utf - \x{11a50} + \x{11a50} /^\p{Script=Soyo}/utf - \x{11aa2} + \x{11aa2} # Character not in script /^\p{Soyombo}/utf - \x{11aa3} + \x{11aa3} # Base script check /^\p{sc=Zanabazar_Square}/utf - \x{11a00} + \x{11a00} /^\p{Script=Zanb}/utf - \x{11a47} + \x{11a47} # Character not in script /^\p{Zanabazar_Square}/utf - \x{11a48} + \x{11a48} # Base script check /^\p{sc=Makasar}/utf - \x{11ee0} + \x{11ee0} /^\p{Script=Maka}/utf - \x{11ef8} + \x{11ef8} # Character not in script /^\p{Makasar}/utf - \x{11ef9} + \x{11ef9} # Base script check /^\p{sc=Medefaidrin}/utf - \x{16e40} + \x{16e40} /^\p{Script=Medf}/utf - \x{16e9a} + \x{16e9a} # Character not in script /^\p{Medefaidrin}/utf - \x{16e9b} + \x{16e9b} # Base script check /^\p{sc=Old_Sogdian}/utf - \x{10f00} + \x{10f00} /^\p{Script=Sogo}/utf - \x{10f27} + \x{10f27} # Character not in script /^\p{Old_Sogdian}/utf - \x{10f28} + \x{10f28} # Base script check /^\p{sc=Elymaic}/utf - \x{10fe0} + \x{10fe0} /^\p{Script=Elym}/utf - \x{10ff6} + \x{10ff6} # Character not in script /^\p{Elymaic}/utf - \x{10ff7} + \x{10ff7} # Base script check /^\p{sc=Nyiakeng_Puachue_Hmong}/utf - \x{1e100} + \x{1e100} /^\p{Script=Hmnp}/utf - \x{1e14f} + \x{1e14f} # Character not in script /^\p{Nyiakeng_Puachue_Hmong}/utf - \x{1e150} + \x{1e150} # Base script check /^\p{sc=Wancho}/utf - \x{1e2c0} + \x{1e2c0} /^\p{Script=Wcho}/utf - \x{1e2ff} + \x{1e2ff} # Character not in script /^\p{Wancho}/utf - \x{1e300} + \x{1e300} # Base script check /^\p{sc=Chorasmian}/utf - \x{10fb0} + \x{10fb0} /^\p{Script=Chrs}/utf - \x{10fcb} + \x{10fcb} # Character not in script /^\p{Chorasmian}/utf - \x{10fcc} + \x{10fcc} # Base script check /^\p{sc=Dives_Akuru}/utf - \x{11900} + \x{11900} /^\p{Script=Diak}/utf - \x{11959} + \x{11959} # Character not in script /^\p{Dives_Akuru}/utf - \x{1195a} + \x{1195a} # Base script check /^\p{sc=Khitan_Small_Script}/utf - \x{16fe4} + \x{16fe4} /^\p{Script=Kits}/utf - \x{18cd5} + \x{18cd5} # Character not in script /^\p{Khitan_Small_Script}/utf - \x{18cd6} + \x{18cd6} # Base script check /^\p{sc=Tangsa}/utf - \x{16a70} + \x{16a70} /^\p{Script=Tnsa}/utf - \x{16ac9} + \x{16ac9} # Character not in script /^\p{Tangsa}/utf - \x{16aca} + \x{16aca} # Base script check /^\p{sc=Toto}/utf - \x{1e290} + \x{1e290} /^\p{Script=Toto}/utf - \x{1e2ae} + \x{1e2ae} # Character not in script /^\p{Toto}/utf - \x{1e2af} + \x{1e2af} # Base script check /^\p{sc=Vithkuqi}/utf - \x{10570} + \x{10570} /^\p{Script=Vith}/utf - \x{105bc} + \x{105bc} # Character not in script /^\p{Vithkuqi}/utf - \x{105bd} + \x{105bd} # Base script check /^\p{sc=Kawi}/utf - \x{11f00} + \x{11f00} /^\p{Script=Kawi}/utf - \x{11f59} + \x{11f59} # Character not in script /^\p{Kawi}/utf - \x{11f5a} + \x{11f6a} # Base script check /^\p{sc=Nag_Mundari}/utf - \x{1e4d0} + \x{1e4d0} /^\p{Script=Nagm}/utf - \x{1e4f9} + \x{1e4f9} # Character not in script /^\p{Nag_Mundari}/utf - \x{1e4fa} + \x{1e4fa} # End of testinput26 diff --git a/testdata/testinput27 b/testdata/testinput27 new file mode 100644 index 0000000..495906c --- /dev/null +++ b/testdata/testinput27 @@ -0,0 +1,3251 @@ +# These tests were generated by maint/GenerateTest.py using PCRE2's UCP +# data, do not edit unless that data has changed and they are reflecting +# a previous version. + +# Unicode Script Extension tests for version 16.0.0 + +#perltest + +# Base script check +/^\p{sc=Latin}/utf + A + +/^\p{Script=Latn}/utf + \x{1df2a} + +# Script extension check +/^\p{Latin}/utf + \x{b7} + +/^\p{scx=Latn}/utf + \x{a92e} + +# Script extension only character +/^\p{Latin}/utf + \x{b7} + +/^\p{sc=Latin}/utf + \x{b7} + +# Character not in script +/^\p{Latin}/utf + \x{1df2b} + +# Base script check +/^\p{sc=Greek}/utf + \x{370} + +/^\p{Script=Grek}/utf + \x{1d245} + +# Script extension check +/^\p{Greek}/utf + \x{b7} + +/^\p{Script_Extensions=Grek}/utf + \x{205d} + +# Script extension only character +/^\p{Greek}/utf + \x{b7} + +/^\p{sc=Greek}/utf + \x{b7} + +# Character not in script +/^\p{Greek}/utf + \x{1d246} + +# Base script check +/^\p{sc=Cyrillic}/utf + \x{400} + +/^\p{Script=Cyrl}/utf + \x{1e08f} + +# Script extension check +/^\p{Cyrillic}/utf + \x{2bc} + +/^\p{scx=Cyrl}/utf + \x{a66f} + +# Script extension only character +/^\p{Cyrillic}/utf + \x{2bc} + +/^\p{sc=Cyrillic}/utf + \x{2bc} + +# Character not in script +/^\p{Cyrillic}/utf + \x{1e090} + +# Base script check +/^\p{sc=Armenian}/utf + \x{531} + +/^\p{Script=Armn}/utf + \x{fb17} + +# Script extension check +/^\p{Armenian}/utf + \x{308} + +/^\p{Script_Extensions=Armn}/utf + \x{589} + +# Script extension only character +/^\p{Armenian}/utf + \x{308} + +/^\p{sc=Armenian}/utf + \x{308} + +# Character not in script +/^\p{Armenian}/utf + \x{fb18} + +# Base script check +/^\p{sc=Hebrew}/utf + \x{591} + +/^\p{Script=Hebr}/utf + \x{fb4f} + +# Script extension check +/^\p{Hebrew}/utf + \x{307} + +/^\p{scx=Hebr}/utf + \x{308} + +# Script extension only character +/^\p{Hebrew}/utf + \x{307} + +/^\p{sc=Hebrew}/utf + \x{307} + +# Character not in script +/^\p{Hebrew}/utf + \x{fb50} + +# Base script check +/^\p{sc=Arabic}/utf + \x{600} + +/^\p{Script=Arab}/utf + \x{1eef1} + +# Script extension check +/^\p{Arabic}/utf + \x{60c} + +/^\p{Script_Extensions=Arab}/utf + \x{102fb} + +# Script extension only character +/^\p{Arabic}/utf + \x{60c} + +/^\p{sc=Arabic}/utf + \x{60c} + +# Character not in script +/^\p{Arabic}/utf + \x{1eef2} + +# Base script check +/^\p{sc=Syriac}/utf + \x{700} + +/^\p{Script=Syrc}/utf + \x{86a} + +# Script extension check +/^\p{Syriac}/utf + \x{303} + +/^\p{scx=Syrc}/utf + \x{1dfa} + +# Script extension only character +/^\p{Syriac}/utf + \x{303} + +/^\p{sc=Syriac}/utf + \x{303} + +# Character not in script +/^\p{Syriac}/utf + \x{1dfb} + +# Base script check +/^\p{sc=Thaana}/utf + \x{780} + +/^\p{Script=Thaa}/utf + \x{7b1} + +# Script extension check +/^\p{Thaana}/utf + \x{60c} + +/^\p{Script_Extensions=Thaa}/utf + \x{fdfd} + +# Script extension only character +/^\p{Thaana}/utf + \x{60c} + +/^\p{sc=Thaana}/utf + \x{60c} + +# Character not in script +/^\p{Thaana}/utf + \x{fdfe} + +# Base script check +/^\p{sc=Devanagari}/utf + \x{900} + +/^\p{Script=Deva}/utf + \x{11b09} + +# Script extension check +/^\p{Devanagari}/utf + \x{2bc} + +/^\p{scx=Deva}/utf + \x{a8f3} + +# Script extension only character +/^\p{Devanagari}/utf + \x{2bc} + +/^\p{sc=Devanagari}/utf + \x{2bc} + +# Character not in script +/^\p{Devanagari}/utf + \x{11b0a} + +# Base script check +/^\p{sc=Bengali}/utf + \x{980} + +/^\p{Script=Beng}/utf + \x{9fe} + +# Script extension check +/^\p{Bengali}/utf + \x{2bc} + +/^\p{Script_Extensions=Beng}/utf + \x{a8f1} + +# Script extension only character +/^\p{Bengali}/utf + \x{2bc} + +/^\p{sc=Bengali}/utf + \x{2bc} + +# Character not in script +/^\p{Bengali}/utf + \x{a8f2} + +# Base script check +/^\p{sc=Gurmukhi}/utf + \x{a01} + +/^\p{Script=Guru}/utf + \x{a76} + +# Script extension check +/^\p{Gurmukhi}/utf + \x{951} + +/^\p{scx=Guru}/utf + \x{a839} + +# Script extension only character +/^\p{Gurmukhi}/utf + \x{951} + +/^\p{sc=Gurmukhi}/utf + \x{951} + +# Character not in script +/^\p{Gurmukhi}/utf + \x{a83a} + +# Base script check +/^\p{sc=Gujarati}/utf + \x{a81} + +/^\p{Script=Gujr}/utf + \x{aff} + +# Script extension check +/^\p{Gujarati}/utf + \x{951} + +/^\p{Script_Extensions=Gujr}/utf + \x{a839} + +# Script extension only character +/^\p{Gujarati}/utf + \x{951} + +/^\p{sc=Gujarati}/utf + \x{951} + +# Character not in script +/^\p{Gujarati}/utf + \x{a83a} + +# Base script check +/^\p{sc=Oriya}/utf + \x{b01} + +/^\p{Script=Orya}/utf + \x{b77} + +# Script extension check +/^\p{Oriya}/utf + \x{951} + +/^\p{scx=Orya}/utf + \x{1cf2} + +# Script extension only character +/^\p{Oriya}/utf + \x{951} + +/^\p{sc=Oriya}/utf + \x{951} + +# Character not in script +/^\p{Oriya}/utf + \x{1cf3} + +# Base script check +/^\p{sc=Tamil}/utf + \x{b82} + +/^\p{Script=Taml}/utf + \x{11fff} + +# Script extension check +/^\p{Tamil}/utf + \x{951} + +/^\p{Script_Extensions=Taml}/utf + \x{11fd3} + +# Script extension only character +/^\p{Tamil}/utf + \x{951} + +/^\p{sc=Tamil}/utf + \x{951} + +# Character not in script +/^\p{Tamil}/utf + \x{12000} + +# Base script check +/^\p{sc=Telugu}/utf + \x{c00} + +/^\p{Script=Telu}/utf + \x{c7f} + +# Script extension check +/^\p{Telugu}/utf + \x{951} + +/^\p{scx=Telu}/utf + \x{1cf2} + +# Script extension only character +/^\p{Telugu}/utf + \x{951} + +/^\p{sc=Telugu}/utf + \x{951} + +# Character not in script +/^\p{Telugu}/utf + \x{1cf3} + +# Base script check +/^\p{sc=Kannada}/utf + \x{c80} + +/^\p{Script=Knda}/utf + \x{cf3} + +# Script extension check +/^\p{Kannada}/utf + \x{951} + +/^\p{Script_Extensions=Knda}/utf + \x{a835} + +# Script extension only character +/^\p{Kannada}/utf + \x{951} + +/^\p{sc=Kannada}/utf + \x{951} + +# Character not in script +/^\p{Kannada}/utf + \x{a836} + +# Base script check +/^\p{sc=Malayalam}/utf + \x{d00} + +/^\p{Script=Mlym}/utf + \x{d7f} + +# Script extension check +/^\p{Malayalam}/utf + \x{951} + +/^\p{scx=Mlym}/utf + \x{a832} + +# Script extension only character +/^\p{Malayalam}/utf + \x{951} + +/^\p{sc=Malayalam}/utf + \x{951} + +# Character not in script +/^\p{Malayalam}/utf + \x{a833} + +# Base script check +/^\p{sc=Sinhala}/utf + \x{d81} + +/^\p{Script=Sinh}/utf + \x{111f4} + +# Script extension check +/^\p{Sinhala}/utf + \x{964} + +/^\p{Script_Extensions=Sinh}/utf + \x{1cf2} + +# Script extension only character +/^\p{Sinhala}/utf + \x{964} + +/^\p{sc=Sinhala}/utf + \x{964} + +# Character not in script +/^\p{Sinhala}/utf + \x{111f5} + +# Base script check +/^\p{sc=Thai}/utf + \x{e01} + +/^\p{Script=Thai}/utf + \x{e5b} + +# Script extension check +/^\p{Thai}/utf + \x{2bc} + +/^\p{scx=Thai}/utf + \x{331} + +# Script extension only character +/^\p{Thai}/utf + \x{2bc} + +/^\p{sc=Thai}/utf + \x{2bc} + +# Character not in script +/^\p{Thai}/utf + \x{e5c} + +# Base script check +/^\p{sc=Tibetan}/utf + \x{f00} + +/^\p{Script=Tibt}/utf + \x{fda} + +# Script extension check +/^\p{Tibetan}/utf + \x{3008} + +/^\p{Script_Extensions=Tibt}/utf + \x{300b} + +# Script extension only character +/^\p{Tibetan}/utf + \x{3008} + +/^\p{sc=Tibetan}/utf + \x{3008} + +# Character not in script +/^\p{Tibetan}/utf + \x{300c} + +# Base script check +/^\p{sc=Myanmar}/utf + \x{1000} + +/^\p{Script=Mymr}/utf + \x{116e3} + +# Script extension check +/^\p{Myanmar}/utf + \x{1040} + +/^\p{scx=Mymr}/utf + \x{a92e} + +# Script extension only character +/^\p{Myanmar}/utf + \x{a92e} + +/^\p{sc=Myanmar}/utf + \x{a92e} + +# Character not in script +/^\p{Myanmar}/utf + \x{116e4} + +# Base script check +/^\p{sc=Georgian}/utf + \x{10a0} + +/^\p{Script=Geor}/utf + \x{2d2d} + +# Script extension check +/^\p{Georgian}/utf + \x{b7} + +/^\p{Script_Extensions=Geor}/utf + \x{2e31} + +# Script extension only character +/^\p{Georgian}/utf + \x{b7} + +/^\p{sc=Georgian}/utf + \x{b7} + +# Character not in script +/^\p{Georgian}/utf + \x{2e32} + +# Base script check +/^\p{sc=Hangul}/utf + \x{1100} + +/^\p{Script=Hang}/utf + \x{ffdc} + +# Script extension check +/^\p{Hangul}/utf + \x{3001} + +/^\p{scx=Hang}/utf + \x{ff65} + +# Script extension only character +/^\p{Hangul}/utf + \x{3001} + +/^\p{sc=Hangul}/utf + \x{3001} + +# Character not in script +/^\p{Hangul}/utf + \x{ffdd} + +# Base script check +/^\p{sc=Ethiopic}/utf + \x{1200} + +/^\p{Script=Ethi}/utf + \x{1e7fe} + +# Script extension check +/^\p{Ethiopic}/utf + \x{30e} + +/^\p{Script_Extensions=Ethi}/utf + \x{30e} + +# Script extension only character +/^\p{Ethiopic}/utf + \x{30e} + +/^\p{sc=Ethiopic}/utf + \x{30e} + +# Character not in script +/^\p{Ethiopic}/utf + \x{1e7ff} + +# Base script check +/^\p{sc=Cherokee}/utf + \x{13a0} + +/^\p{Script=Cher}/utf + \x{abbf} + +# Script extension check +/^\p{Cherokee}/utf + \x{300} + +/^\p{scx=Cher}/utf + \x{331} + +# Script extension only character +/^\p{Cherokee}/utf + \x{300} + +/^\p{sc=Cherokee}/utf + \x{300} + +# Character not in script +/^\p{Cherokee}/utf + \x{abc0} + +# Base script check +/^\p{sc=Runic}/utf + \x{16a0} + +/^\p{Script=Runr}/utf + \x{16f8} + +# Script extension check +/^\p{Runic}/utf + \x{16eb} + +/^\p{Script_Extensions=Runr}/utf + \x{16ed} + +# Script extension only character +/^\p{Runic}/utf + \x{16eb} + +/^\p{sc=Runic}/utf + \x{16eb} + +# Character not in script +/^\p{Runic}/utf + \x{16f9} + +# Base script check +/^\p{sc=Mongolian}/utf + \x{1800} + +/^\p{Script=Mong}/utf + \x{1166c} + +# Script extension check +/^\p{Mongolian}/utf + \x{1802} + +/^\p{scx=Mong}/utf + \x{300b} + +# Script extension only character +/^\p{Mongolian}/utf + \x{1802} + +/^\p{sc=Mongolian}/utf + \x{1802} + +# Character not in script +/^\p{Mongolian}/utf + \x{1166d} + +# Base script check +/^\p{sc=Hiragana}/utf + \x{3041} + +/^\p{Script=Hira}/utf + \x{1f200} + +# Script extension check +/^\p{Hiragana}/utf + \x{3001} + +/^\p{Script_Extensions=Hira}/utf + \x{ff9f} + +# Script extension only character +/^\p{Hiragana}/utf + \x{3001} + +/^\p{sc=Hiragana}/utf + \x{3001} + +# Character not in script +/^\p{Hiragana}/utf + \x{1f201} + +# Base script check +/^\p{sc=Katakana}/utf + \x{30a1} + +/^\p{Script=Kana}/utf + \x{1b167} + +# Script extension check +/^\p{Katakana}/utf + \x{305} + +/^\p{scx=Kana}/utf + \x{ff9f} + +# Script extension only character +/^\p{Katakana}/utf + \x{305} + +/^\p{sc=Katakana}/utf + \x{305} + +# Character not in script +/^\p{Katakana}/utf + \x{1b168} + +# Base script check +/^\p{sc=Bopomofo}/utf + \x{2ea} + +/^\p{Script=Bopo}/utf + \x{31bf} + +# Script extension check +/^\p{Bopomofo}/utf + \x{2c7} + +/^\p{Script_Extensions=Bopo}/utf + \x{ff65} + +# Script extension only character +/^\p{Bopomofo}/utf + \x{2c7} + +/^\p{sc=Bopomofo}/utf + \x{2c7} + +# Character not in script +/^\p{Bopomofo}/utf + \x{ff66} + +# Base script check +/^\p{sc=Han}/utf + \x{2e80} + +/^\p{Script=Hani}/utf + \x{323af} + +# Script extension check +/^\p{Han}/utf + \x{b7} + +/^\p{scx=Hani}/utf + \x{1f251} + +# Script extension only character +/^\p{Han}/utf + \x{b7} + +/^\p{sc=Han}/utf + \x{b7} + +# Character not in script +/^\p{Han}/utf + \x{323b0} + +# Base script check +/^\p{sc=Yi}/utf + \x{a000} + +/^\p{Script=Yiii}/utf + \x{a4c6} + +# Script extension check +/^\p{Yi}/utf + \x{3001} + +/^\p{Script_Extensions=Yiii}/utf + \x{ff65} + +# Script extension only character +/^\p{Yi}/utf + \x{3001} + +/^\p{sc=Yi}/utf + \x{3001} + +# Character not in script +/^\p{Yi}/utf + \x{ff66} + +# Base script check +/^\p{sc=Gothic}/utf + \x{10330} + +/^\p{Script=Goth}/utf + \x{1034a} + +# Script extension check +/^\p{Gothic}/utf + \x{b7} + +/^\p{scx=Goth}/utf + \x{331} + +# Script extension only character +/^\p{Gothic}/utf + \x{b7} + +/^\p{sc=Gothic}/utf + \x{b7} + +# Character not in script +/^\p{Gothic}/utf + \x{1034b} + +# Base script check +/^\p{sc=Tagalog}/utf + \x{1700} + +/^\p{Script=Tglg}/utf + \x{171f} + +# Script extension check +/^\p{Tagalog}/utf + \x{1735} + +/^\p{Script_Extensions=Tglg}/utf + \x{1736} + +# Script extension only character +/^\p{Tagalog}/utf + \x{1735} + +/^\p{sc=Tagalog}/utf + \x{1735} + +# Character not in script +/^\p{Tagalog}/utf + \x{1737} + +# Base script check +/^\p{sc=Hanunoo}/utf + \x{1720} + +/^\p{Script=Hano}/utf + \x{1734} + +# Script extension check +/^\p{Hanunoo}/utf + \x{1735} + +/^\p{scx=Hano}/utf + \x{1736} + +# Script extension only character +/^\p{Hanunoo}/utf + \x{1735} + +/^\p{sc=Hanunoo}/utf + \x{1735} + +# Character not in script +/^\p{Hanunoo}/utf + \x{1737} + +# Base script check +/^\p{sc=Buhid}/utf + \x{1740} + +/^\p{Script=Buhd}/utf + \x{1753} + +# Script extension check +/^\p{Buhid}/utf + \x{1735} + +/^\p{Script_Extensions=Buhd}/utf + \x{1736} + +# Script extension only character +/^\p{Buhid}/utf + \x{1735} + +/^\p{sc=Buhid}/utf + \x{1735} + +# Character not in script +/^\p{Buhid}/utf + \x{1754} + +# Base script check +/^\p{sc=Tagbanwa}/utf + \x{1760} + +/^\p{Script=Tagb}/utf + \x{1773} + +# Script extension check +/^\p{Tagbanwa}/utf + \x{1735} + +/^\p{scx=Tagb}/utf + \x{1736} + +# Script extension only character +/^\p{Tagbanwa}/utf + \x{1735} + +/^\p{sc=Tagbanwa}/utf + \x{1735} + +# Character not in script +/^\p{Tagbanwa}/utf + \x{1774} + +# Base script check +/^\p{sc=Limbu}/utf + \x{1900} + +/^\p{Script=Limb}/utf + \x{194f} + +# Script extension check +/^\p{Limbu}/utf + \x{965} + +/^\p{Script_Extensions=Limb}/utf + \x{965} + +# Script extension only character +/^\p{Limbu}/utf + \x{965} + +/^\p{sc=Limbu}/utf + \x{965} + +# Character not in script +/^\p{Limbu}/utf + \x{1950} + +# Base script check +/^\p{sc=Tai_Le}/utf + \x{1950} + +/^\p{Script=Tale}/utf + \x{1974} + +# Script extension check +/^\p{Tai_Le}/utf + \x{300} + +/^\p{scx=Tale}/utf + \x{1049} + +# Script extension only character +/^\p{Tai_Le}/utf + \x{300} + +/^\p{sc=Tai_Le}/utf + \x{300} + +# Character not in script +/^\p{Tai_Le}/utf + \x{1975} + +# Base script check +/^\p{sc=Linear_B}/utf + \x{10000} + +/^\p{Script=Linb}/utf + \x{100fa} + +# Script extension check +/^\p{Linear_B}/utf + \x{10100} + +/^\p{Script_Extensions=Linb}/utf + \x{1013f} + +# Script extension only character +/^\p{Linear_B}/utf + \x{10100} + +/^\p{sc=Linear_B}/utf + \x{10100} + +# Character not in script +/^\p{Linear_B}/utf + \x{10140} + +# Base script check +/^\p{sc=Shavian}/utf + \x{10450} + +/^\p{Script=Shaw}/utf + \x{1047f} + +# Script extension check +/^\p{Shavian}/utf + \x{b7} + +/^\p{scx=Shaw}/utf + \x{b7} + +# Script extension only character +/^\p{Shavian}/utf + \x{b7} + +/^\p{sc=Shavian}/utf + \x{b7} + +# Character not in script +/^\p{Shavian}/utf + \x{10480} + +# Base script check +/^\p{sc=Cypriot}/utf + \x{10800} + +/^\p{Script=Cprt}/utf + \x{1083f} + +# Script extension check +/^\p{Cypriot}/utf + \x{10100} + +/^\p{Script_Extensions=Cprt}/utf + \x{1013f} + +# Script extension only character +/^\p{Cypriot}/utf + \x{10100} + +/^\p{sc=Cypriot}/utf + \x{10100} + +# Character not in script +/^\p{Cypriot}/utf + \x{10840} + +# Base script check +/^\p{sc=Buginese}/utf + \x{1a00} + +/^\p{Script=Bugi}/utf + \x{1a1f} + +# Script extension check +/^\p{Buginese}/utf + \x{a9cf} + +/^\p{scx=Bugi}/utf + \x{a9cf} + +# Script extension only character +/^\p{Buginese}/utf + \x{a9cf} + +/^\p{sc=Buginese}/utf + \x{a9cf} + +# Character not in script +/^\p{Buginese}/utf + \x{a9d0} + +# Base script check +/^\p{sc=Coptic}/utf + \x{3e2} + +/^\p{Script=Copt}/utf + \x{2cff} + +# Script extension check +/^\p{Coptic}/utf + \x{b7} + +/^\p{Script_Extensions=Copt}/utf + \x{102fb} + +# Script extension only character +/^\p{Coptic}/utf + \x{b7} + +/^\p{sc=Coptic}/utf + \x{b7} + +# Character not in script +/^\p{Coptic}/utf + \x{102fc} + +# Base script check +/^\p{sc=Glagolitic}/utf + \x{2c00} + +/^\p{Script=Glag}/utf + \x{1e02a} + +# Script extension check +/^\p{Glagolitic}/utf + \x{b7} + +/^\p{scx=Glag}/utf + \x{a66f} + +# Script extension only character +/^\p{Glagolitic}/utf + \x{b7} + +/^\p{sc=Glagolitic}/utf + \x{b7} + +# Character not in script +/^\p{Glagolitic}/utf + \x{1e02b} + +# Base script check +/^\p{sc=Tifinagh}/utf + \x{2d30} + +/^\p{Script=Tfng}/utf + \x{2d7f} + +# Script extension check +/^\p{Tifinagh}/utf + \x{302} + +/^\p{Script_Extensions=Tfng}/utf + \x{309} + +# Script extension only character +/^\p{Tifinagh}/utf + \x{302} + +/^\p{sc=Tifinagh}/utf + \x{302} + +# Character not in script +/^\p{Tifinagh}/utf + \x{2d80} + +# Base script check +/^\p{sc=Syloti_Nagri}/utf + \x{a800} + +/^\p{Script=Sylo}/utf + \x{a82c} + +# Script extension check +/^\p{Syloti_Nagri}/utf + \x{964} + +/^\p{scx=Sylo}/utf + \x{9ef} + +# Script extension only character +/^\p{Syloti_Nagri}/utf + \x{964} + +/^\p{sc=Syloti_Nagri}/utf + \x{964} + +# Character not in script +/^\p{Syloti_Nagri}/utf + \x{a82d} + +# Base script check +/^\p{sc=Phags_Pa}/utf + \x{a840} + +/^\p{Script=Phag}/utf + \x{a877} + +# Script extension check +/^\p{Phags_Pa}/utf + \x{1802} + +/^\p{Script_Extensions=Phag}/utf + \x{3002} + +# Script extension only character +/^\p{Phags_Pa}/utf + \x{1802} + +/^\p{sc=Phags_Pa}/utf + \x{1802} + +# Character not in script +/^\p{Phags_Pa}/utf + \x{a878} + +# Base script check +/^\p{sc=Nko}/utf + \x{7c0} + +/^\p{Script=Nkoo}/utf + \x{7ff} + +# Script extension check +/^\p{Nko}/utf + \x{60c} + +/^\p{scx=Nkoo}/utf + \x{fd3f} + +# Script extension only character +/^\p{Nko}/utf + \x{60c} + +/^\p{sc=Nko}/utf + \x{60c} + +# Character not in script +/^\p{Nko}/utf + \x{fd40} + +# Base script check +/^\p{sc=Kayah_Li}/utf + \x{a900} + +/^\p{Script=Kali}/utf + \x{a92f} + +# Script extension check +/^\p{Kayah_Li}/utf + \x{a92e} + +/^\p{Script_Extensions=Kali}/utf + \x{a92e} + +# Script extension only character +/^\p{Kayah_Li}/utf + \x{a92e} + +/^\p{sc=Kayah_Li}/utf + \x{a92e} + +# Character not in script +/^\p{Kayah_Li}/utf + \x{a930} + +# Base script check +/^\p{sc=Lycian}/utf + \x{10280} + +/^\p{Script=Lyci}/utf + \x{1029c} + +# Script extension check +/^\p{Lycian}/utf + \x{205a} + +/^\p{scx=Lyci}/utf + \x{205a} + +# Script extension only character +/^\p{Lycian}/utf + \x{205a} + +/^\p{sc=Lycian}/utf + \x{205a} + +# Character not in script +/^\p{Lycian}/utf + \x{1029d} + +# Base script check +/^\p{sc=Carian}/utf + \x{102a0} + +/^\p{Script=Cari}/utf + \x{102d0} + +# Script extension check +/^\p{Carian}/utf + \x{b7} + +/^\p{Script_Extensions=Cari}/utf + \x{2e31} + +# Script extension only character +/^\p{Carian}/utf + \x{b7} + +/^\p{sc=Carian}/utf + \x{b7} + +# Character not in script +/^\p{Carian}/utf + \x{102d1} + +# Base script check +/^\p{sc=Lydian}/utf + \x{10920} + +/^\p{Script=Lydi}/utf + \x{1093f} + +# Script extension check +/^\p{Lydian}/utf + \x{b7} + +/^\p{scx=Lydi}/utf + \x{2e31} + +# Script extension only character +/^\p{Lydian}/utf + \x{b7} + +/^\p{sc=Lydian}/utf + \x{b7} + +# Character not in script +/^\p{Lydian}/utf + \x{10940} + +# Base script check +/^\p{sc=Avestan}/utf + \x{10b00} + +/^\p{Script=Avst}/utf + \x{10b3f} + +# Script extension check +/^\p{Avestan}/utf + \x{b7} + +/^\p{Script_Extensions=Avst}/utf + \x{2e31} + +# Script extension only character +/^\p{Avestan}/utf + \x{b7} + +/^\p{sc=Avestan}/utf + \x{b7} + +# Character not in script +/^\p{Avestan}/utf + \x{10b40} + +# Base script check +/^\p{sc=Samaritan}/utf + \x{800} + +/^\p{Script=Samr}/utf + \x{83e} + +# Script extension check +/^\p{Samaritan}/utf + \x{2e31} + +/^\p{scx=Samr}/utf + \x{2e31} + +# Script extension only character +/^\p{Samaritan}/utf + \x{2e31} + +/^\p{sc=Samaritan}/utf + \x{2e31} + +# Character not in script +/^\p{Samaritan}/utf + \x{2e32} + +# Base script check +/^\p{sc=Lisu}/utf + \x{a4d0} + +/^\p{Script=Lisu}/utf + \x{11fb0} + +# Script extension check +/^\p{Lisu}/utf + \x{2bc} + +/^\p{Script_Extensions=Lisu}/utf + \x{300b} + +# Script extension only character +/^\p{Lisu}/utf + \x{2bc} + +/^\p{sc=Lisu}/utf + \x{2bc} + +# Character not in script +/^\p{Lisu}/utf + \x{11fb1} + +# Base script check +/^\p{sc=Javanese}/utf + \x{a980} + +/^\p{Script=Java}/utf + \x{a9df} + +# Script extension check +/^\p{Javanese}/utf + \x{a9cf} + +/^\p{scx=Java}/utf + \x{a9cf} + +# Script extension only character +/^\p{Javanese}/utf + \x{a9cf} + +/^\p{sc=Javanese}/utf + \x{a9cf} + +# Character not in script +/^\p{Javanese}/utf + \x{a9e0} + +# Base script check +/^\p{sc=Old_Turkic}/utf + \x{10c00} + +/^\p{Script=Orkh}/utf + \x{10c48} + +# Script extension check +/^\p{Old_Turkic}/utf + \x{205a} + +/^\p{Script_Extensions=Orkh}/utf + \x{2e30} + +# Script extension only character +/^\p{Old_Turkic}/utf + \x{205a} + +/^\p{sc=Old_Turkic}/utf + \x{205a} + +# Character not in script +/^\p{Old_Turkic}/utf + \x{10c49} + +# Base script check +/^\p{sc=Kaithi}/utf + \x{11080} + +/^\p{Script=Kthi}/utf + \x{110cd} + +# Script extension check +/^\p{Kaithi}/utf + \x{966} + +/^\p{scx=Kthi}/utf + \x{a839} + +# Script extension only character +/^\p{Kaithi}/utf + \x{966} + +/^\p{sc=Kaithi}/utf + \x{966} + +# Character not in script +/^\p{Kaithi}/utf + \x{110ce} + +# Base script check +/^\p{sc=Mandaic}/utf + \x{840} + +/^\p{Script=Mand}/utf + \x{85e} + +# Script extension check +/^\p{Mandaic}/utf + \x{640} + +/^\p{Script_Extensions=Mand}/utf + \x{640} + +# Script extension only character +/^\p{Mandaic}/utf + \x{640} + +/^\p{sc=Mandaic}/utf + \x{640} + +# Character not in script +/^\p{Mandaic}/utf + \x{85f} + +# Base script check +/^\p{sc=Chakma}/utf + \x{11100} + +/^\p{Script=Cakm}/utf + \x{11147} + +# Script extension check +/^\p{Chakma}/utf + \x{9e6} + +/^\p{scx=Cakm}/utf + \x{1049} + +# Script extension only character +/^\p{Chakma}/utf + \x{9e6} + +/^\p{sc=Chakma}/utf + \x{9e6} + +# Character not in script +/^\p{Chakma}/utf + \x{11148} + +# Base script check +/^\p{sc=Meroitic_Hieroglyphs}/utf + \x{10980} + +/^\p{Script=Mero}/utf + \x{1099f} + +# Script extension check +/^\p{Meroitic_Hieroglyphs}/utf + \x{205d} + +/^\p{Script_Extensions=Mero}/utf + \x{205d} + +# Script extension only character +/^\p{Meroitic_Hieroglyphs}/utf + \x{205d} + +/^\p{sc=Meroitic_Hieroglyphs}/utf + \x{205d} + +# Character not in script +/^\p{Meroitic_Hieroglyphs}/utf + \x{109a0} + +# Base script check +/^\p{sc=Sharada}/utf + \x{11180} + +/^\p{Script=Shrd}/utf + \x{111df} + +# Script extension check +/^\p{Sharada}/utf + \x{951} + +/^\p{scx=Shrd}/utf + \x{a838} + +# Script extension only character +/^\p{Sharada}/utf + \x{951} + +/^\p{sc=Sharada}/utf + \x{951} + +# Character not in script +/^\p{Sharada}/utf + \x{111e0} + +# Base script check +/^\p{sc=Takri}/utf + \x{11680} + +/^\p{Script=Takr}/utf + \x{116c9} + +# Script extension check +/^\p{Takri}/utf + \x{964} + +/^\p{Script_Extensions=Takr}/utf + \x{a839} + +# Script extension only character +/^\p{Takri}/utf + \x{964} + +/^\p{sc=Takri}/utf + \x{964} + +# Character not in script +/^\p{Takri}/utf + \x{116ca} + +# Base script check +/^\p{sc=Caucasian_Albanian}/utf + \x{10530} + +/^\p{Script=Aghb}/utf + \x{1056f} + +# Script extension check +/^\p{Caucasian_Albanian}/utf + \x{304} + +/^\p{scx=Aghb}/utf + \x{35e} + +# Script extension only character +/^\p{Caucasian_Albanian}/utf + \x{304} + +/^\p{sc=Caucasian_Albanian}/utf + \x{304} + +# Character not in script +/^\p{Caucasian_Albanian}/utf + \x{10570} + +# Base script check +/^\p{sc=Duployan}/utf + \x{1bc00} + +/^\p{Script=Dupl}/utf + \x{1bc9f} + +# Script extension check +/^\p{Duployan}/utf + \x{b7} + +/^\p{Script_Extensions=Dupl}/utf + \x{1bca3} + +# Script extension only character +/^\p{Duployan}/utf + \x{b7} + +/^\p{sc=Duployan}/utf + \x{b7} + +# Character not in script +/^\p{Duployan}/utf + \x{1bca4} + +# Base script check +/^\p{sc=Elbasan}/utf + \x{10500} + +/^\p{Script=Elba}/utf + \x{10527} + +# Script extension check +/^\p{Elbasan}/utf + \x{b7} + +/^\p{scx=Elba}/utf + \x{305} + +# Script extension only character +/^\p{Elbasan}/utf + \x{b7} + +/^\p{sc=Elbasan}/utf + \x{b7} + +# Character not in script +/^\p{Elbasan}/utf + \x{10528} + +# Base script check +/^\p{sc=Grantha}/utf + \x{11300} + +/^\p{Script=Gran}/utf + \x{11374} + +# Script extension check +/^\p{Grantha}/utf + \x{951} + +/^\p{Script_Extensions=Gran}/utf + \x{11fd3} + +# Script extension only character +/^\p{Grantha}/utf + \x{951} + +/^\p{sc=Grantha}/utf + \x{951} + +# Character not in script +/^\p{Grantha}/utf + \x{11fd4} + +# Base script check +/^\p{sc=Khojki}/utf + \x{11200} + +/^\p{Script=Khoj}/utf + \x{11241} + +# Script extension check +/^\p{Khojki}/utf + \x{ae6} + +/^\p{scx=Khoj}/utf + \x{a839} + +# Script extension only character +/^\p{Khojki}/utf + \x{ae6} + +/^\p{sc=Khojki}/utf + \x{ae6} + +# Character not in script +/^\p{Khojki}/utf + \x{11242} + +# Base script check +/^\p{sc=Linear_A}/utf + \x{10600} + +/^\p{Script=Lina}/utf + \x{10767} + +# Script extension check +/^\p{Linear_A}/utf + \x{10107} + +/^\p{Script_Extensions=Lina}/utf + \x{10133} + +# Script extension only character +/^\p{Linear_A}/utf + \x{10107} + +/^\p{sc=Linear_A}/utf + \x{10107} + +# Character not in script +/^\p{Linear_A}/utf + \x{10768} + +# Base script check +/^\p{sc=Mahajani}/utf + \x{11150} + +/^\p{Script=Mahj}/utf + \x{11176} + +# Script extension check +/^\p{Mahajani}/utf + \x{b7} + +/^\p{scx=Mahj}/utf + \x{a839} + +# Script extension only character +/^\p{Mahajani}/utf + \x{b7} + +/^\p{sc=Mahajani}/utf + \x{b7} + +# Character not in script +/^\p{Mahajani}/utf + \x{11177} + +# Base script check +/^\p{sc=Manichaean}/utf + \x{10ac0} + +/^\p{Script=Mani}/utf + \x{10af6} + +# Script extension check +/^\p{Manichaean}/utf + \x{640} + +/^\p{Script_Extensions=Mani}/utf + \x{10af2} + +# Script extension only character +/^\p{Manichaean}/utf + \x{640} + +/^\p{sc=Manichaean}/utf + \x{640} + +# Character not in script +/^\p{Manichaean}/utf + \x{10af7} + +# Base script check +/^\p{sc=Modi}/utf + \x{11600} + +/^\p{Script=Modi}/utf + \x{11659} + +# Script extension check +/^\p{Modi}/utf + \x{a830} + +/^\p{scx=Modi}/utf + \x{a839} + +# Script extension only character +/^\p{Modi}/utf + \x{a830} + +/^\p{sc=Modi}/utf + \x{a830} + +# Character not in script +/^\p{Modi}/utf + \x{1165a} + +# Base script check +/^\p{sc=Old_Permic}/utf + \x{10350} + +/^\p{Script=Perm}/utf + \x{1037a} + +# Script extension check +/^\p{Old_Permic}/utf + \x{b7} + +/^\p{Script_Extensions=Perm}/utf + \x{483} + +# Script extension only character +/^\p{Old_Permic}/utf + \x{b7} + +/^\p{sc=Old_Permic}/utf + \x{b7} + +# Character not in script +/^\p{Old_Permic}/utf + \x{1037b} + +# Base script check +/^\p{sc=Psalter_Pahlavi}/utf + \x{10b80} + +/^\p{Script=Phlp}/utf + \x{10baf} + +# Script extension check +/^\p{Psalter_Pahlavi}/utf + \x{640} + +/^\p{scx=Phlp}/utf + \x{640} + +# Script extension only character +/^\p{Psalter_Pahlavi}/utf + \x{640} + +/^\p{sc=Psalter_Pahlavi}/utf + \x{640} + +# Character not in script +/^\p{Psalter_Pahlavi}/utf + \x{10bb0} + +# Base script check +/^\p{sc=Khudawadi}/utf + \x{112b0} + +/^\p{Script=Sind}/utf + \x{112f9} + +# Script extension check +/^\p{Khudawadi}/utf + \x{964} + +/^\p{Script_Extensions=Sind}/utf + \x{a839} + +# Script extension only character +/^\p{Khudawadi}/utf + \x{964} + +/^\p{sc=Khudawadi}/utf + \x{964} + +# Character not in script +/^\p{Khudawadi}/utf + \x{112fa} + +# Base script check +/^\p{sc=Tirhuta}/utf + \x{11480} + +/^\p{Script=Tirh}/utf + \x{114d9} + +# Script extension check +/^\p{Tirhuta}/utf + \x{951} + +/^\p{scx=Tirh}/utf + \x{a839} + +# Script extension only character +/^\p{Tirhuta}/utf + \x{951} + +/^\p{sc=Tirhuta}/utf + \x{951} + +# Character not in script +/^\p{Tirhuta}/utf + \x{114da} + +# Base script check +/^\p{sc=Multani}/utf + \x{11280} + +/^\p{Script=Mult}/utf + \x{112a9} + +# Script extension check +/^\p{Multani}/utf + \x{a66} + +/^\p{Script_Extensions=Mult}/utf + \x{a6f} + +# Script extension only character +/^\p{Multani}/utf + \x{a66} + +/^\p{sc=Multani}/utf + \x{a66} + +# Character not in script +/^\p{Multani}/utf + \x{112aa} + +# Base script check +/^\p{sc=Old_Hungarian}/utf + \x{10c80} + +/^\p{Script=Hung}/utf + \x{10cff} + +# Script extension check +/^\p{Old_Hungarian}/utf + \x{205a} + +/^\p{scx=Hung}/utf + \x{2e41} + +# Script extension only character +/^\p{Old_Hungarian}/utf + \x{205a} + +/^\p{sc=Old_Hungarian}/utf + \x{205a} + +# Character not in script +/^\p{Old_Hungarian}/utf + \x{10d00} + +# Base script check +/^\p{sc=Adlam}/utf + \x{1e900} + +/^\p{Script=Adlm}/utf + \x{1e95f} + +# Script extension check +/^\p{Adlam}/utf + \x{61f} + +/^\p{Script_Extensions=Adlm}/utf + \x{2e41} + +# Script extension only character +/^\p{Adlam}/utf + \x{61f} + +/^\p{sc=Adlam}/utf + \x{61f} + +# Character not in script +/^\p{Adlam}/utf + \x{1e960} + +# Base script check +/^\p{sc=Osage}/utf + \x{104b0} + +/^\p{Script=Osge}/utf + \x{104fb} + +# Script extension check +/^\p{Osage}/utf + \x{301} + +/^\p{scx=Osge}/utf + \x{358} + +# Script extension only character +/^\p{Osage}/utf + \x{301} + +/^\p{sc=Osage}/utf + \x{301} + +# Character not in script +/^\p{Osage}/utf + \x{104fc} + +# Base script check +/^\p{sc=Tangut}/utf + \x{16fe0} + +/^\p{Script=Tang}/utf + \x{18d08} + +# Script extension check +/^\p{Tangut}/utf + \x{2ff0} + +/^\p{Script_Extensions=Tang}/utf + \x{31ef} + +# Script extension only character +/^\p{Tangut}/utf + \x{2ff0} + +/^\p{sc=Tangut}/utf + \x{2ff0} + +# Character not in script +/^\p{Tangut}/utf + \x{18d09} + +# Base script check +/^\p{sc=Masaram_Gondi}/utf + \x{11d00} + +/^\p{Script=Gonm}/utf + \x{11d59} + +# Script extension check +/^\p{Masaram_Gondi}/utf + \x{964} + +/^\p{scx=Gonm}/utf + \x{965} + +# Script extension only character +/^\p{Masaram_Gondi}/utf + \x{964} + +/^\p{sc=Masaram_Gondi}/utf + \x{964} + +# Character not in script +/^\p{Masaram_Gondi}/utf + \x{11d5a} + +# Base script check +/^\p{sc=Dogra}/utf + \x{11800} + +/^\p{Script=Dogr}/utf + \x{1183b} + +# Script extension check +/^\p{Dogra}/utf + \x{964} + +/^\p{Script_Extensions=Dogr}/utf + \x{a839} + +# Script extension only character +/^\p{Dogra}/utf + \x{964} + +/^\p{sc=Dogra}/utf + \x{964} + +# Character not in script +/^\p{Dogra}/utf + \x{1183c} + +# Base script check +/^\p{sc=Gunjala_Gondi}/utf + \x{11d60} + +/^\p{Script=Gong}/utf + \x{11da9} + +# Script extension check +/^\p{Gunjala_Gondi}/utf + \x{b7} + +/^\p{scx=Gong}/utf + \x{965} + +# Script extension only character +/^\p{Gunjala_Gondi}/utf + \x{b7} + +/^\p{sc=Gunjala_Gondi}/utf + \x{b7} + +# Character not in script +/^\p{Gunjala_Gondi}/utf + \x{11daa} + +# Base script check +/^\p{sc=Hanifi_Rohingya}/utf + \x{10d00} + +/^\p{Script=Rohg}/utf + \x{10d39} + +# Script extension check +/^\p{Hanifi_Rohingya}/utf + \x{60c} + +/^\p{Script_Extensions=Rohg}/utf + \x{6d4} + +# Script extension only character +/^\p{Hanifi_Rohingya}/utf + \x{60c} + +/^\p{sc=Hanifi_Rohingya}/utf + \x{60c} + +# Character not in script +/^\p{Hanifi_Rohingya}/utf + \x{10d3a} + +# Base script check +/^\p{sc=Sogdian}/utf + \x{10f30} + +/^\p{Script=Sogd}/utf + \x{10f59} + +# Script extension check +/^\p{Sogdian}/utf + \x{640} + +/^\p{scx=Sogd}/utf + \x{640} + +# Script extension only character +/^\p{Sogdian}/utf + \x{640} + +/^\p{sc=Sogdian}/utf + \x{640} + +# Character not in script +/^\p{Sogdian}/utf + \x{10f5a} + +# Base script check +/^\p{sc=Nandinagari}/utf + \x{119a0} + +/^\p{Script=Nand}/utf + \x{119e4} + +# Script extension check +/^\p{Nandinagari}/utf + \x{964} + +/^\p{Script_Extensions=Nand}/utf + \x{a835} + +# Script extension only character +/^\p{Nandinagari}/utf + \x{964} + +/^\p{sc=Nandinagari}/utf + \x{964} + +# Character not in script +/^\p{Nandinagari}/utf + \x{119e5} + +# Base script check +/^\p{sc=Yezidi}/utf + \x{10e80} + +/^\p{Script=Yezi}/utf + \x{10eb1} + +# Script extension check +/^\p{Yezidi}/utf + \x{60c} + +/^\p{scx=Yezi}/utf + \x{669} + +# Script extension only character +/^\p{Yezidi}/utf + \x{60c} + +/^\p{sc=Yezidi}/utf + \x{60c} + +# Character not in script +/^\p{Yezidi}/utf + \x{10eb2} + +# Base script check +/^\p{sc=Cypro_Minoan}/utf + \x{12f90} + +/^\p{Script=Cpmn}/utf + \x{12ff2} + +# Script extension check +/^\p{Cypro_Minoan}/utf + \x{10100} + +/^\p{Script_Extensions=Cpmn}/utf + \x{10101} + +# Script extension only character +/^\p{Cypro_Minoan}/utf + \x{10100} + +/^\p{sc=Cypro_Minoan}/utf + \x{10100} + +# Character not in script +/^\p{Cypro_Minoan}/utf + \x{12ff3} + +# Base script check +/^\p{sc=Old_Uyghur}/utf + \x{10f70} + +/^\p{Script=Ougr}/utf + \x{10f89} + +# Script extension check +/^\p{Old_Uyghur}/utf + \x{640} + +/^\p{scx=Ougr}/utf + \x{10af2} + +# Script extension only character +/^\p{Old_Uyghur}/utf + \x{640} + +/^\p{sc=Old_Uyghur}/utf + \x{640} + +# Character not in script +/^\p{Old_Uyghur}/utf + \x{10f8a} + +# Base script check +/^\p{sc=Toto}/utf + \x{1e290} + +/^\p{Script=Toto}/utf + \x{1e2ae} + +# Script extension check +/^\p{Toto}/utf + \x{2bc} + +/^\p{Script_Extensions=Toto}/utf + \x{2bc} + +# Script extension only character +/^\p{Toto}/utf + \x{2bc} + +/^\p{sc=Toto}/utf + \x{2bc} + +# Character not in script +/^\p{Toto}/utf + \x{1e2af} + +# Base script check +/^\p{sc=Garay}/utf + \x{10d40} + +/^\p{Script=Gara}/utf + \x{10d8f} + +# Script extension check +/^\p{Garay}/utf + \x{60c} + +/^\p{scx=Gara}/utf + \x{61f} + +# Script extension only character +/^\p{Garay}/utf + \x{60c} + +/^\p{sc=Garay}/utf + \x{60c} + +# Character not in script +/^\p{Garay}/utf + \x{10d90} + +# Base script check +/^\p{sc=Gurung_Khema}/utf + \x{16100} + +/^\p{Script=Gukh}/utf + \x{16139} + +# Script extension check +/^\p{Gurung_Khema}/utf + \x{965} + +/^\p{Script_Extensions=Gukh}/utf + \x{965} + +# Script extension only character +/^\p{Gurung_Khema}/utf + \x{965} + +/^\p{sc=Gurung_Khema}/utf + \x{965} + +# Character not in script +/^\p{Gurung_Khema}/utf + \x{1613a} + +# Base script check +/^\p{sc=Ol_Onal}/utf + \x{1e5d0} + +/^\p{Script=Onao}/utf + \x{1e5ff} + +# Script extension check +/^\p{Ol_Onal}/utf + \x{964} + +/^\p{scx=Onao}/utf + \x{965} + +# Script extension only character +/^\p{Ol_Onal}/utf + \x{964} + +/^\p{sc=Ol_Onal}/utf + \x{964} + +# Character not in script +/^\p{Ol_Onal}/utf + \x{1e600} + +# Base script check +/^\p{sc=Sunuwar}/utf + \x{11bc0} + +/^\p{Script=Sunu}/utf + \x{11bf9} + +# Script extension check +/^\p{Sunuwar}/utf + \x{300} + +/^\p{Script_Extensions=Sunu}/utf + \x{331} + +# Script extension only character +/^\p{Sunuwar}/utf + \x{300} + +/^\p{sc=Sunuwar}/utf + \x{300} + +# Character not in script +/^\p{Sunuwar}/utf + \x{11bfa} + +# Base script check +/^\p{sc=Todhri}/utf + \x{105c0} + +/^\p{Script=Todr}/utf + \x{105f3} + +# Script extension check +/^\p{Todhri}/utf + \x{301} + +/^\p{scx=Todr}/utf + \x{35e} + +# Script extension only character +/^\p{Todhri}/utf + \x{301} + +/^\p{sc=Todhri}/utf + \x{301} + +# Character not in script +/^\p{Todhri}/utf + \x{105f4} + +# Base script check +/^\p{sc=Tulu_Tigalari}/utf + \x{11380} + +/^\p{Script=Tutg}/utf + \x{113e2} + +# Script extension check +/^\p{Tulu_Tigalari}/utf + \x{ce6} + +/^\p{Script_Extensions=Tutg}/utf + \x{a8f1} + +# Script extension only character +/^\p{Tulu_Tigalari}/utf + \x{ce6} + +/^\p{sc=Tulu_Tigalari}/utf + \x{ce6} + +# Character not in script +/^\p{Tulu_Tigalari}/utf + \x{113e3} + +# Base script check +/^\p{sc=Common}/utf + \x{00} + +/^\p{Script=Zyyy}/utf + \x{e007f} + +# Character not in script +/^\p{Common}/utf + \x{e0080} + +# Base script check +/^\p{sc=Lao}/utf + \x{e81} + +/^\p{Script=Laoo}/utf + \x{edf} + +# Character not in script +/^\p{Lao}/utf + \x{ee0} + +# Base script check +/^\p{sc=Canadian_Aboriginal}/utf + \x{1400} + +/^\p{Script=Cans}/utf + \x{11abf} + +# Character not in script +/^\p{Canadian_Aboriginal}/utf + \x{11ac0} + +# Base script check +/^\p{sc=Ogham}/utf + \x{1680} + +/^\p{Script=Ogam}/utf + \x{169c} + +# Character not in script +/^\p{Ogham}/utf + \x{169d} + +# Base script check +/^\p{sc=Khmer}/utf + \x{1780} + +/^\p{Script=Khmr}/utf + \x{19ff} + +# Character not in script +/^\p{Khmer}/utf + \x{1a00} + +# Base script check +/^\p{sc=Old_Italic}/utf + \x{10300} + +/^\p{Script=Ital}/utf + \x{1032f} + +# Character not in script +/^\p{Old_Italic}/utf + \x{10330} + +# Base script check +/^\p{sc=Deseret}/utf + \x{10400} + +/^\p{Script=Dsrt}/utf + \x{1044f} + +# Character not in script +/^\p{Deseret}/utf + \x{10450} + +# Base script check +/^\p{sc=Inherited}/utf + \x{300} + +/^\p{Script=Zinh}/utf + \x{e01ef} + +# Character not in script +/^\p{Inherited}/utf + \x{e01f0} + +# Base script check +/^\p{sc=Ugaritic}/utf + \x{10380} + +/^\p{Script=Ugar}/utf + \x{1039f} + +# Character not in script +/^\p{Ugaritic}/utf + \x{103a0} + +# Base script check +/^\p{sc=Osmanya}/utf + \x{10480} + +/^\p{Script=Osma}/utf + \x{104a9} + +# Character not in script +/^\p{Osmanya}/utf + \x{104aa} + +# Base script check +/^\p{sc=Braille}/utf + \x{2800} + +/^\p{Script=Brai}/utf + \x{28ff} + +# Character not in script +/^\p{Braille}/utf + \x{2900} + +# Base script check +/^\p{sc=New_Tai_Lue}/utf + \x{1980} + +/^\p{Script=Talu}/utf + \x{19df} + +# Character not in script +/^\p{New_Tai_Lue}/utf + \x{19e0} + +# Base script check +/^\p{sc=Old_Persian}/utf + \x{103a0} + +/^\p{Script=Xpeo}/utf + \x{103d5} + +# Character not in script +/^\p{Old_Persian}/utf + \x{103d6} + +# Base script check +/^\p{sc=Kharoshthi}/utf + \x{10a00} + +/^\p{Script=Khar}/utf + \x{10a58} + +# Character not in script +/^\p{Kharoshthi}/utf + \x{10a59} + +# Base script check +/^\p{sc=Balinese}/utf + \x{1b00} + +/^\p{Script=Bali}/utf + \x{1b7f} + +# Character not in script +/^\p{Balinese}/utf + \x{1b80} + +# Base script check +/^\p{sc=Cuneiform}/utf + \x{12000} + +/^\p{Script=Xsux}/utf + \x{12543} + +# Character not in script +/^\p{Cuneiform}/utf + \x{12544} + +# Base script check +/^\p{sc=Phoenician}/utf + \x{10900} + +/^\p{Script=Phnx}/utf + \x{1091f} + +# Character not in script +/^\p{Phoenician}/utf + \x{10920} + +# Base script check +/^\p{sc=Sundanese}/utf + \x{1b80} + +/^\p{Script=Sund}/utf + \x{1cc7} + +# Character not in script +/^\p{Sundanese}/utf + \x{1cc8} + +# Base script check +/^\p{sc=Lepcha}/utf + \x{1c00} + +/^\p{Script=Lepc}/utf + \x{1c4f} + +# Character not in script +/^\p{Lepcha}/utf + \x{1c50} + +# Base script check +/^\p{sc=Ol_Chiki}/utf + \x{1c50} + +/^\p{Script=Olck}/utf + \x{1c7f} + +# Character not in script +/^\p{Ol_Chiki}/utf + \x{1c80} + +# Base script check +/^\p{sc=Vai}/utf + \x{a500} + +/^\p{Script=Vaii}/utf + \x{a62b} + +# Character not in script +/^\p{Vai}/utf + \x{a62c} + +# Base script check +/^\p{sc=Saurashtra}/utf + \x{a880} + +/^\p{Script=Saur}/utf + \x{a8d9} + +# Character not in script +/^\p{Saurashtra}/utf + \x{a8da} + +# Base script check +/^\p{sc=Rejang}/utf + \x{a930} + +/^\p{Script=Rjng}/utf + \x{a95f} + +# Character not in script +/^\p{Rejang}/utf + \x{a960} + +# Base script check +/^\p{sc=Cham}/utf + \x{aa00} + +/^\p{Script=Cham}/utf + \x{aa5f} + +# Character not in script +/^\p{Cham}/utf + \x{aa60} + +# Base script check +/^\p{sc=Tai_Tham}/utf + \x{1a20} + +/^\p{Script=Lana}/utf + \x{1aad} + +# Character not in script +/^\p{Tai_Tham}/utf + \x{1aae} + +# Base script check +/^\p{sc=Tai_Viet}/utf + \x{aa80} + +/^\p{Script=Tavt}/utf + \x{aadf} + +# Character not in script +/^\p{Tai_Viet}/utf + \x{aae0} + +# Base script check +/^\p{sc=Egyptian_Hieroglyphs}/utf + \x{13000} + +/^\p{Script=Egyp}/utf + \x{143fa} + +# Character not in script +/^\p{Egyptian_Hieroglyphs}/utf + \x{143fb} + +# Base script check +/^\p{sc=Bamum}/utf + \x{a6a0} + +/^\p{Script=Bamu}/utf + \x{16a38} + +# Character not in script +/^\p{Bamum}/utf + \x{16a39} + +# Base script check +/^\p{sc=Meetei_Mayek}/utf + \x{aae0} + +/^\p{Script=Mtei}/utf + \x{abf9} + +# Character not in script +/^\p{Meetei_Mayek}/utf + \x{abfa} + +# Base script check +/^\p{sc=Imperial_Aramaic}/utf + \x{10840} + +/^\p{Script=Armi}/utf + \x{1085f} + +# Character not in script +/^\p{Imperial_Aramaic}/utf + \x{10860} + +# Base script check +/^\p{sc=Old_South_Arabian}/utf + \x{10a60} + +/^\p{Script=Sarb}/utf + \x{10a7f} + +# Character not in script +/^\p{Old_South_Arabian}/utf + \x{10a80} + +# Base script check +/^\p{sc=Inscriptional_Parthian}/utf + \x{10b40} + +/^\p{Script=Prti}/utf + \x{10b5f} + +# Character not in script +/^\p{Inscriptional_Parthian}/utf + \x{10b60} + +# Base script check +/^\p{sc=Inscriptional_Pahlavi}/utf + \x{10b60} + +/^\p{Script=Phli}/utf + \x{10b7f} + +# Character not in script +/^\p{Inscriptional_Pahlavi}/utf + \x{10b80} + +# Base script check +/^\p{sc=Batak}/utf + \x{1bc0} + +/^\p{Script=Batk}/utf + \x{1bff} + +# Character not in script +/^\p{Batak}/utf + \x{1c00} + +# Base script check +/^\p{sc=Brahmi}/utf + \x{11000} + +/^\p{Script=Brah}/utf + \x{1107f} + +# Character not in script +/^\p{Brahmi}/utf + \x{11080} + +# Base script check +/^\p{sc=Meroitic_Cursive}/utf + \x{109a0} + +/^\p{Script=Merc}/utf + \x{109ff} + +# Character not in script +/^\p{Meroitic_Cursive}/utf + \x{10a00} + +# Base script check +/^\p{sc=Miao}/utf + \x{16f00} + +/^\p{Script=Plrd}/utf + \x{16f9f} + +# Character not in script +/^\p{Miao}/utf + \x{16fa0} + +# Base script check +/^\p{sc=Sora_Sompeng}/utf + \x{110d0} + +/^\p{Script=Sora}/utf + \x{110f9} + +# Character not in script +/^\p{Sora_Sompeng}/utf + \x{110fa} + +# Base script check +/^\p{sc=Bassa_Vah}/utf + \x{16ad0} + +/^\p{Script=Bass}/utf + \x{16af5} + +# Character not in script +/^\p{Bassa_Vah}/utf + \x{16af6} + +# Base script check +/^\p{sc=Pahawh_Hmong}/utf + \x{16b00} + +/^\p{Script=Hmng}/utf + \x{16b8f} + +# Character not in script +/^\p{Pahawh_Hmong}/utf + \x{16b90} + +# Base script check +/^\p{sc=Mende_Kikakui}/utf + \x{1e800} + +/^\p{Script=Mend}/utf + \x{1e8d6} + +# Character not in script +/^\p{Mende_Kikakui}/utf + \x{1e8d7} + +# Base script check +/^\p{sc=Mro}/utf + \x{16a40} + +/^\p{Script=Mroo}/utf + \x{16a6f} + +# Character not in script +/^\p{Mro}/utf + \x{16a70} + +# Base script check +/^\p{sc=Old_North_Arabian}/utf + \x{10a80} + +/^\p{Script=Narb}/utf + \x{10a9f} + +# Character not in script +/^\p{Old_North_Arabian}/utf + \x{10aa0} + +# Base script check +/^\p{sc=Nabataean}/utf + \x{10880} + +/^\p{Script=Nbat}/utf + \x{108af} + +# Character not in script +/^\p{Nabataean}/utf + \x{108b0} + +# Base script check +/^\p{sc=Palmyrene}/utf + \x{10860} + +/^\p{Script=Palm}/utf + \x{1087f} + +# Character not in script +/^\p{Palmyrene}/utf + \x{10880} + +# Base script check +/^\p{sc=Pau_Cin_Hau}/utf + \x{11ac0} + +/^\p{Script=Pauc}/utf + \x{11af8} + +# Character not in script +/^\p{Pau_Cin_Hau}/utf + \x{11af9} + +# Base script check +/^\p{sc=Siddham}/utf + \x{11580} + +/^\p{Script=Sidd}/utf + \x{115dd} + +# Character not in script +/^\p{Siddham}/utf + \x{115de} + +# Base script check +/^\p{sc=Warang_Citi}/utf + \x{118a0} + +/^\p{Script=Wara}/utf + \x{118ff} + +# Character not in script +/^\p{Warang_Citi}/utf + \x{11900} + +# Base script check +/^\p{sc=Ahom}/utf + \x{11700} + +/^\p{Script=Ahom}/utf + \x{11746} + +# Character not in script +/^\p{Ahom}/utf + \x{11747} + +# Base script check +/^\p{sc=Anatolian_Hieroglyphs}/utf + \x{14400} + +/^\p{Script=Hluw}/utf + \x{14646} + +# Character not in script +/^\p{Anatolian_Hieroglyphs}/utf + \x{14647} + +# Base script check +/^\p{sc=Hatran}/utf + \x{108e0} + +/^\p{Script=Hatr}/utf + \x{108ff} + +# Character not in script +/^\p{Hatran}/utf + \x{10900} + +# Base script check +/^\p{sc=SignWriting}/utf + \x{1d800} + +/^\p{Script=Sgnw}/utf + \x{1daaf} + +# Character not in script +/^\p{SignWriting}/utf + \x{1dab0} + +# Base script check +/^\p{sc=Bhaiksuki}/utf + \x{11c00} + +/^\p{Script=Bhks}/utf + \x{11c6c} + +# Character not in script +/^\p{Bhaiksuki}/utf + \x{11c6d} + +# Base script check +/^\p{sc=Marchen}/utf + \x{11c70} + +/^\p{Script=Marc}/utf + \x{11cb6} + +# Character not in script +/^\p{Marchen}/utf + \x{11cb7} + +# Base script check +/^\p{sc=Newa}/utf + \x{11400} + +/^\p{Script=Newa}/utf + \x{11461} + +# Character not in script +/^\p{Newa}/utf + \x{11462} + +# Base script check +/^\p{sc=Nushu}/utf + \x{16fe1} + +/^\p{Script=Nshu}/utf + \x{1b2fb} + +# Character not in script +/^\p{Nushu}/utf + \x{1b2fc} + +# Base script check +/^\p{sc=Soyombo}/utf + \x{11a50} + +/^\p{Script=Soyo}/utf + \x{11aa2} + +# Character not in script +/^\p{Soyombo}/utf + \x{11aa3} + +# Base script check +/^\p{sc=Zanabazar_Square}/utf + \x{11a00} + +/^\p{Script=Zanb}/utf + \x{11a47} + +# Character not in script +/^\p{Zanabazar_Square}/utf + \x{11a48} + +# Base script check +/^\p{sc=Makasar}/utf + \x{11ee0} + +/^\p{Script=Maka}/utf + \x{11ef8} + +# Character not in script +/^\p{Makasar}/utf + \x{11ef9} + +# Base script check +/^\p{sc=Medefaidrin}/utf + \x{16e40} + +/^\p{Script=Medf}/utf + \x{16e9a} + +# Character not in script +/^\p{Medefaidrin}/utf + \x{16e9b} + +# Base script check +/^\p{sc=Old_Sogdian}/utf + \x{10f00} + +/^\p{Script=Sogo}/utf + \x{10f27} + +# Character not in script +/^\p{Old_Sogdian}/utf + \x{10f28} + +# Base script check +/^\p{sc=Elymaic}/utf + \x{10fe0} + +/^\p{Script=Elym}/utf + \x{10ff6} + +# Character not in script +/^\p{Elymaic}/utf + \x{10ff7} + +# Base script check +/^\p{sc=Nyiakeng_Puachue_Hmong}/utf + \x{1e100} + +/^\p{Script=Hmnp}/utf + \x{1e14f} + +# Character not in script +/^\p{Nyiakeng_Puachue_Hmong}/utf + \x{1e150} + +# Base script check +/^\p{sc=Wancho}/utf + \x{1e2c0} + +/^\p{Script=Wcho}/utf + \x{1e2ff} + +# Character not in script +/^\p{Wancho}/utf + \x{1e300} + +# Base script check +/^\p{sc=Chorasmian}/utf + \x{10fb0} + +/^\p{Script=Chrs}/utf + \x{10fcb} + +# Character not in script +/^\p{Chorasmian}/utf + \x{10fcc} + +# Base script check +/^\p{sc=Dives_Akuru}/utf + \x{11900} + +/^\p{Script=Diak}/utf + \x{11959} + +# Character not in script +/^\p{Dives_Akuru}/utf + \x{1195a} + +# Base script check +/^\p{sc=Khitan_Small_Script}/utf + \x{16fe4} + +/^\p{Script=Kits}/utf + \x{18cff} + +# Character not in script +/^\p{Khitan_Small_Script}/utf + \x{18d00} + +# Base script check +/^\p{sc=Tangsa}/utf + \x{16a70} + +/^\p{Script=Tnsa}/utf + \x{16ac9} + +# Character not in script +/^\p{Tangsa}/utf + \x{16aca} + +# Base script check +/^\p{sc=Vithkuqi}/utf + \x{10570} + +/^\p{Script=Vith}/utf + \x{105bc} + +# Character not in script +/^\p{Vithkuqi}/utf + \x{105bd} + +# Base script check +/^\p{sc=Kawi}/utf + \x{11f00} + +/^\p{Script=Kawi}/utf + \x{11f5a} + +# Character not in script +/^\p{Kawi}/utf + \x{11f5b} + +# Base script check +/^\p{sc=Nag_Mundari}/utf + \x{1e4d0} + +/^\p{Script=Nagm}/utf + \x{1e4f9} + +# Character not in script +/^\p{Nag_Mundari}/utf + \x{1e4fa} + +# Base script check +/^\p{sc=Kirat_Rai}/utf + \x{16d40} + +/^\p{Script=Krai}/utf + \x{16d79} + +# Character not in script +/^\p{Kirat_Rai}/utf + \x{16d7a} + +# End of test diff --git a/testdata/testinput3 b/testdata/testinput3 index 20f8d4c..59337f9 100644 --- a/testdata/testinput3 +++ b/testdata/testinput3 @@ -1,5 +1,5 @@ # This set of tests checks local-specific features, using the "fr_FR" locale. -# It is not Perl-compatible. When run via RunTest, the locale is edited to +# It is almost Perl-compatible. When run via RunTest, the locale is edited to # be whichever of "fr_FR", "french", or "fr" is found to exist. There is # different version of this file called wintestinput3 for use on Windows, # where the locale is called "french" and the tests are run using @@ -14,10 +14,6 @@ /^[\w]+/locale=fr_FR École -/^[\w]+/ -\= Expect no match - École - /^[\W]+/ École @@ -80,6 +76,14 @@ \= Expect no match \x9c +/ÿ/i + \xff +\= Expect no match + y + +/(.)\1/i + \xfe\xde + /\W+/ >>>\xaa<<< >>>\xba<<< diff --git a/testdata/testinput4 b/testdata/testinput4 index 2205caf..2eb5f54 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -998,6 +998,13 @@ \= Expect no match \x{660}\x{661}\x{662}ABC +/^\pN{3,}+(.)/utf + \x{7c0}8\x{662}\x{966}\x{95c} + \x{7c0}8\x{662}\x{95c} +\= Expect no match + \x{7c0}8\x{662}\x{966} + \x{7c0}8\x{95c} + /(?<=A\p{Nd})XYZ/utf A2XYZ 123A5XYZPQR @@ -1127,6 +1134,14 @@ A\x{300}\x{301}B\x{300}C\x{300}\x{301}X A\x{300}\x{301}B\x{300}C\x{300}\x{301}DA\x{300}X +/^\X{3,}+/utf + A\x{300}B\x{301}U\x{303}\x{0301} + A\x{300}B\x{301}U\x{303}\x{0301}X +\= Expect no match + A\x{300} + A\x{300}B\x{301} + A\x{300}U\x{303}\x{0301} + /^\X/utf A A\x{300}BC @@ -1841,6 +1856,28 @@ /[z\x{017f}]+/i,utf \x{0053}\x{0073}\x{017f} +/^[a-z\x{500}-\x{1000}]{3,}[a-h]|x/utf + ab\x{600}ijklmh + ab\x{600}hijklm +\= Expect no match + ab\x{600}ijklm + +/^[a-z\x{500}-\x{1000}]{4,7}[a-h]|x/utf + ab\x{600}\x{700}ijkh + ab\x{600}\x{700}hijkl +\= Expect no match + ab\x{600}\x{700}ijklh + ab\x{600}h\x{700}ijklmh + +/([a-z\x{1000}\x{2000}]{1,2}?u)+$/utf + \x{1000}uu\x{2000}u + \x{1001}uuuu + \x{2001}uuuuu + uuuu\x{1fff}#u#\x{2000}\x{1000}u\x{2000}u +\= Expect no match + abuabuabuabu! + uuuuuuuuuuuu# + # -------------------------------------- /(ΣΆΜΟΣ) \1/i,utf @@ -2335,6 +2372,9 @@ /[\N{U+1234}]/utf \x{1234} +/(\x{1234}) \1/utf + \N{U+1234} \o{11064} + # Test the full list of Unicode "Pattern White Space" characters that are to # be ignored by /x. The pattern lines below may show up oddly in text editors # or when listed to the screen. Note that characters such as U+2002, which are @@ -2871,4 +2911,209 @@ /caf\B.+?\B/utf,ucp --cafe\x{300}_au\x{203f}lait! +# -------------------------------------------------------------------------- +# Case-independent matching property tests added after changing PCRE2 to be +# compatible with Perl. All three cases (upper, lower, title) conflate. + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/utf + >AbbD< + >Abb\x{01c5}< +\= Expect no match + >aBBd< + >aB!!< + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf + >aB!!< + >\x{01c5}B!!< +\= Expect no match + >AbbD< + >aBBd< + >Abb\x{01c5}< + +/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf + >aB!!< +\= Expect no match + >AbbD< + >aBBd< + >Abb\x{01c5}< + +/[\p{Lt}\x{36b}][\P{Lt}\x{10a0}]/i,utf + >A!< + >\x{3c9}\x{58d}< + >\x{413}\x{940}< +\= Expect no match + \x{3c9}\x{3c9} + \x{58d}\x{58d} + \x{413}\x{413} + \x{940}\x{940} + +/^\p{Lt}+/i,utf + \x{1c5}AB + +# -------------------------------------------------------------------------- + +/\p{ ^ L u }/ + AbCd + +# hex + +/c3 b1/hex,utf + \N{U+00F1} + +/[^\P{Lu}1]/i,utf,ucp + a + A + \x{3a3} + \x{3c3} +\= Expect no match + 1 + 2 + +/[^\P{Lu}1]/utf,ucp + A + \x{3a3} +\= Expect no match + 1 + 2 + a + \x{3c3} + +/[\P{Lu}1]/i,utf,ucp + 1 + 2 +\= Expect no match + a + A + \x{3a3} + \x{3c3} + +/[\P{Lu}1]/utf,ucp + 1 + 2 + a + \x{3c3} +\= Expect no match + A + \x{3a3} + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[\p{L} - \p{Lu}])/ + a +\= Expect no match + A + 1 + +/(?[\p{L} & \p{Lu}])/ + A +\= Expect no match + a + 1 + +/(?[[\p{Lu}z] ^ [\p{Ll}G]])/ + A + p +\= Expect no match + G + z + 1 + +/(?[\p{Ll} | \p{Nd}])/ + a + 1 +\= Expect no match + A + +/(?[\p{Ll} + [\p{Nd}]])/ + a + 1 +\= Expect no match + A + +/(?[ ![\p{Nd}z] ])/ + _ + Z +\= Expect no match + 1 + z + +/(?[ \P{Nd} + [2] ])/ + _ + Z + 2 +\= Expect no match + 1 + 3 + +/(?[ ![\P{Nd}] ])/ + 1 + 2 +\= Expect no match + _ + z + +# caseless tests + +/(?[ \p{Lu} ^ \p{Ll} ])/ + a + A +\= Expect no match + _ + 1 + +/(?[ [\p{Lu}1] ^ \p{Ll} ])/i + 1 +\= Expect no match + a + A + _ + +/(?[ [\p{Lu}1] & [\p{Ll}1] ])/ + 1 +\= Expect no match + a + A + _ + 2 + +/(?[ [\p{Lu}1] & [\p{Ll}1] ])/i + a + A + 1 +\= Expect no match + _ + 2 + +/(?[ \p{Lu} + \p{Ll} & [a-z] ])/utf + \x{0411} + a + A +\= Expect no match + \x{0431} + +/(?[ (\p{Lu} + \p{Ll}) & [a-z] ])/utf + a +\= Expect no match + \x{0411} + \x{0431} + A + +/(?[ [a-z] & \p{Lu} + \p{Ll} ])/utf + a + \x{0431} +\= Expect no match + \x{0411} + A + +/(?[ [a-z] & (\p{Lu} + \p{Ll}) ])/utf + a +\= Expect no match + \x{0431} + \x{0411} + A + +# -------------- + # End of testinput4 diff --git a/testdata/testinput5 b/testdata/testinput5 index 7e04873..a0d2cd5 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -75,6 +75,12 @@ # --------------------------------------------------------------------- +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. +/\65535/IB,utf,no_start_optimize + +/\65536/IB,utf,no_start_optimize + /\x{110000}/IB,utf /\o{4200000}/IB,utf @@ -333,6 +339,8 @@ /[[:a\x{100}b:]]/utf +/[\p{InvalidOrBadProperty}]/ + /a[^]b/utf,allow_empty_class,match_unset_backref a\x{1234}b a\nb @@ -854,8 +862,14 @@ /[\P{Any}]/B +/[^\P{Any}\P{Any}]/B + /[\P{Any}\E]/B +/\p{Any}#\P{Any}![\p{Any}]:[\P{Any}]@[\p{Any}a-z]%[\P{Any}c]/B,utf + +/[\P{Any}\P{Any}\P{Any}]![\p{Any}\p{Any}\p{Any}]:[^\P{Any}\P{Any}]@[^\p{Any}\p{Any}]%[^\p{Any}\P{Any}]/B,utf + /(\P{Yi}+\277)/ /(\P{Yi}+\277)?/ @@ -945,38 +959,6 @@ \x{2028} \x{200d} -# These are here because Perl has problems with the negative versions of the -# properties and has changed how it behaves for caseless matching. - -/\p{^Lu}/i,utf - 1234 -\= Expect no match - ABC - -/\P{Lu}/i,utf - 1234 -\= Expect no match - ABC - -/\p{Ll}/i,utf - a - Az -\= Expect no match - ABC - -/\p{Lu}/i,utf - A - a\x{10a0}B -\= Expect no match - a - \x{1d00} - -/\p{Lu}/i,utf - A - aZ -\= Expect no match - abc - /[\x{c0}\x{391}]/i,utf \x{c0} \x{e0} @@ -1735,8 +1717,8 @@ /[^\D\P{Nd}]/utf a9b - \x{1d7cf} \= Expect no match + \x{1d7cf} \x{10000} # Hex uses pattern length, not zero-terminated. This tests for overrunning @@ -2304,8 +2286,283 @@ \= Expect no match \x{212a}\x{212a} +/[sk](?r:[sk])[sk]/Bi,utf + SKS + sks + \x{212a}S\x{17f} + \x{17f}K\x{212a} +\= Expect no match + s\x{212a}s + K\x{17f}K + +/(.) \1/i,utf,caseless_restrict + s S + k K +\= Expect no match + s \x{17f} + k \x{212a} + +/(.) (?r:\1)/i,utf + s S + k K +\= Expect no match + s \x{17f} + k \x{212a} + +/(.) \1/i,utf + s S + k K + s \x{17f} + k \x{212a} + +/(?:(?ss)|(?kk)) \k/i,utf,dupnames,caseless_restrict + sS Ss + kK Kk +\= Expect no match + sS \x{17f}s + kK \x{212a}k + +/(?:(?ss)|(?kk)) \k/i,utf,dupnames + sS Ss + kK Kk + sS \x{17f}s + kK \x{212a}k + +/(?:(?s)|(?k)) \k{3,}!/i,utf,dupnames,caseless_restrict + s SsSs! + k KkKk! +\= Expect no match + s \x{17f}sSs\x{17f}! + k \x{212a}kKk\x{212a}! + +/(?:(?s)|(?k)) \k{3,}!/i,utf,dupnames + s SsSs! + k KkKk! + s \x{17f}sSs\x{17f}! + k \x{212a}kKk\x{212a}! + # End caseless restrict tests +# TESTS for PCRE2_EXTRA_TURKISH_CASING - again, tests with and without. + +/i/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/i/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/I/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/I/i,utf,turkish_casing + I + \x{0131} +\= Expect no match + i + \x{0130} + +/\x{0130}/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/\x{0130}/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/\x{0131}/i,utf + \x{0131} +\= Expect no match + i + I + \x{0130} + +/\x{0131}/i,utf,turkish_casing + I + \x{0131} +\= Expect no match + i + \x{0130} + +/[i]/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/[i]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[^i]/i,utf + \x{0130} + \x{0131} +\= Expect no match + i + I + +/[^i]/i,utf,turkish_casing + I + \x{0131} +\= Expect no match + i + \x{0130} + +/[\x{0130}]/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/[\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[\x{0120}-\x{0130}]/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/[\x{0120}-\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[zi]/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/[zi]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[z\x{0130}]/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/[z\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[iI]/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/[iI]/i,utf,turkish_casing + i + I + \x{0130} + \x{0131} + +/[i\x{0130}]/i,utf + i + I + \x{0130} +\= Expect no match + \x{0131} + +/[i\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/(.) \1/i,utf + i I +\= Expect no match + i \x{0130} + \x{0131} I + +/(*TURKISH_CASING)(.) \1/i,utf + i \x{0130} + \x{0131} I +\= Expect no match + i I + +/(.) \1/i,utf,turkish_casing + i \x{0130} + \x{0131} I +\= Expect no match + i I + +/i/i,utf,caseless_restrict,turkish_casing + +/i/i,turkish_casing + +/i/i,utf,caseless_restrict + i + +/i/i,ucp,caseless_restrict + i + +/b(?r:[\x{00FF}-\x{FFEE}])/i,utf,turkish_casing + b\x{0130} + b\x{0131} +\= Expect no match + bi + bI + bk + +/[\x60-\x7f]/i,ucp + i + I + +/[\x60-\xc0]/i,ucp + i + I + +/[\x80-\xc0]/i,ucp +\= Expect no match + i + I + +# End Turkish casing tests + # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. # DIGITS @@ -2531,4 +2788,816 @@ /(?\777< + abc + +/a(?b)c/utf,substitute_extended + abc\=replace=>${namED_1}< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${namedverylongbutperfectlylegalsoyoushouldnthaveaproblem_1}< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${nämed}< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${nämedverylongbutperfectlylegalsoyoushouldnthaveaproblem_Ù¢}< + +# python_octal + +/\400/utf + \o{400} + +/\400/utf,python_octal + +/abc/utf,substitute_extended + abc\=replace=\400 + +/abc/utf,substitute_extended,python_octal + abc\=replace=\400 + +# Character range merging tests + +/[\x{1200}\s\x{1202}\d\x{1201}]+/B,utf,ucp + \x{11ff}\x{1200}\x{1201}\x{1202}\x{1203} + +/[\x{2000}-\x{2500}\x{2100}-\x{2600}\d\x{1800}-\x{1fff}]+/B,utf,ucp + \x{17ff}\x{1800}\x{2600}\x{2601} + +/[\x{10008}\x{10003}\x{10006}\x{10004}\x{10007}]+/B,utf + \x{10002}\x{10005}\x{10003}\x{10004}\x{10006}\x{10007}\x{10008}\x{10009} + +/[\x{100}-\x{400}]+/Bi,utf + qS\x{ff}\x{100}\x{a7c5}\x{401} + \x{2c63}\x{2c64}\x{2c65}\x{2c66}\x{2c67} + \x{a7af}\x{a7b0}\x{a7b1}\x{a7b2}\x{a7b3} + +/[\x{100}-\x{400}\p{Ll}\x{500}-\x{700}\p{OldHungarian}\x{701}\p{bidiLRI}]/B,utf + +/[\pC\x{100}-\x{200}\h\pN]/B,utf + +/[\pC\x{100}-\x{200}\v\pN]/B,utf + +/[\pC\x{100}-\x{200}\H\pN]/B,utf + +/[\pC\x{100}-\x{200}\V\pN]/B,utf + +/[\x{16e49}-\x{16e4f}\x{20000}\x{16e40}-\x{16e48}\pN]/Bi,utf + +/[\x80-\x{4000}\x90\x{400}-\x{f000}\xa0\x{4000}-\x{10ffff}]++/B,utf + \x{7f}\x{80}\x{100}\x{10fffe}\x{10ffff}\x00 + +/[\x80-\x{4000}\x90\x{400}-\x{f000}\xa0\pN\x{4000}-\x{10ffff}]++/B,utf + \x{7f}\x{80}\x{100}090\x{10fffe}\x{10ffff}\x00 + +/[\x00-\x{4000}\x{2000}-\x{10ffff}]++/B,utf + abcd + +/[abc\p{Any}]{5,7}/B,utf + xyz + +/[^\p{Any}\x34\p{Any}]*cat/B,utf + cat + +/[\pN\xf0-\x{10ffff}]{5,8}/B,utf + ab0123456cd + +/[\x00-\x{398}\x{39a}-\x{10ffff}]*#(?i)[\x00-\x{398}\x{39a}-\x{10ffff}]*?#/B,utf + abcd#efg# + +# Freeing memory on error test +/[\x{100}-\x{400}][\x{100}-\x{300}][\x{100}-\x{200}]\8/i,utf + +# Character list tests + +/[\x{100}-\x{7fff}\x{d7b0}\x{d7b1}\x{d7b3}\x{d7b4}\x{d7b6}\x{d7b7}\x{d7b9}\x{d7ba}]{12}/B,utf + \x{8000}\x{d7af}\x{d7b2}\x{d7b5}\x{d7b8}\x{d7bb}\x{100}\x{800}\x{7000}\x{7fff}\x{d7b0}\x{d7b1}\x{d7b3}\x{d7b4}\x{d7b6}\x{d7b7}\x{d7b9}\x{d7ba}\x{100} + +/([\x{6535}\x{6536}\x{6538}\x{6539}\x{653b}\x{653c}\x{653e}\x{653f}\x{6541}\x{6542}\x{8000}-\x{ffff}]#)+/B,utf + \x{6534}#\x{6537}#\x{653a}#\x{653d}#\x{6540}#\x{6543}#\x{7fff}#\x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}#\x{ffff} + +/[[:xdigit:]\x{400}-\x{600}]+/utf,ucp + !a0\x{400}\x{600}9\x{3ff} + +/[^[:xdigit:]\x{400}-\x{600}]+/utf,ucp + \x{400}(\x{3ff}\x{601})\x{600} + +/[[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp + !A0\x{700}9\x{601} + +/[^[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp + \x{600}(\x{6ff}\x{701}\x{3ff}\x{601})\x{700} + +/[[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp + !f0\x{800}\x{600}9\x{601} + +/[^[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp + \x{700}[\x{3ff}\x{601}\x{6ff}\x{801}\x{8ff}\x{901}]\x{900} + +/[[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp + !F0\x{400}\x{410}\x{500}\x{600}\x{610}\x{700}\x{800}\x{810}9\x{7ff} + +/[^[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp + \x{800}<\x{3ff}\x{411}\x{4ff}\x{501}\x{5ff}\x{611}\x{6ff}\x{701}\x{7ff}\x{811}>\x{810} + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[\p{Lu}[\p{Nd}]]/B,alt_extended_class + 0 + C +\= Expect no match + [ + a + +/[[\pL][\p{Nd}]]/B,alt_extended_class + 0 + a +\= Expect no match + [ + ] + +/[[\p{Lu}]||[\p{Nd}]]/B,alt_extended_class + A + 1 +\= Expect no match + a + +/[[^\pL][\p{Nd}]]/B,alt_extended_class + 0 + . +\= Expect no match + A + +/[^[\pL][\p{Nd}]]/B,alt_extended_class + . +\= Expect no match + A + 0 + +/[^[\pL]&&[\p{Nd}]]/B,alt_extended_class + A + 0 + +/[[\p{Lu}\p{Ll}]||[\p{Nd}\p{Ll}]]/B,alt_extended_class + A + 1 + c +\= Expect no match + _ + +/[[\p{Lu}\p{Ll}]&&[\p{Nd}\p{Ll}]]/B,alt_extended_class + c +\= Expect no match + A + 1 + _ + +/[[\p{Lu}\p{Ll}]--[\p{Nd}\p{Ll}]]/B,alt_extended_class + A +\= Expect no match + 1 + c + _ + +/[[\p{Lu}\p{Ll}]~~[\p{Nd}\p{Ll}]]/B,alt_extended_class + A + 1 +\= Expect no match + c + _ + +/[\pL[]]]/B,alt_extended_class + A + ] +\= Expect no match + [ + +/[\pL[^]]]/B,alt_extended_class + A + [ + 0 +\= Expect no match + ] + +/[\pL[]]/B,alt_extended_class,allow_empty_class + A +\= Expect no match + ] + [ + +/[\pL[^]]/B,alt_extended_class,allow_empty_class + A + 0 + [ + ] + +/[\dAC-E[:space:]\p{Lu}&&[^z]]/B,alt_extended_class + 0 + A + C + D + E + \t +\= Expect no match + a + ; + +/[z||[^\dAC-E[:space:]\p{Lu}]]/B,alt_extended_class + z + ; +\= Expect no match + 0 + A + C + D + E + B + F + \t + +/[\p{Lu}\p{Nd}||cd]/B,alt_extended_class + A + 0 + c +\= Expect no match + e + +/[[\p{Lu}]\p{Nd}||[c]d]/B,alt_extended_class + A + 0 + c +\= Expect no match + e + +/[\p{Lu}[\p{Nd}]||c[d]]/B,alt_extended_class + A + 0 + c +\= Expect no match + e + +/[\p{Lu}-]/B,alt_extended_class + A + - +\= Expect no match + a + +/[-\p{Lu}]/B,alt_extended_class + A + - +\= Expect no match + a + +/[\pL-]/B,alt_extended_class + A + - +\= Expect no match + 0 + +/[-\pL]/B,alt_extended_class + A + - +\= Expect no match + 0 + +/[\p{Lu}-]/B + A + - +\= Expect no match + a + +/[-\p{Lu}]/B + A + - +\= Expect no match + a + +/[\pL-]/B + A + - +\= Expect no match + 0 + +/[-\pL]/B + A + - +\= Expect no match + 0 + +/[\p{Lu}-z]/B,alt_extended_class + +/[z-\p{Lu}]/B,alt_extended_class + +/[\pL-z]/B,alt_extended_class + +/[z-\pL]/B,alt_extended_class + +/[\p{Lu}-&&-\pL]/B,alt_extended_class + - + A +\= Expect no match + a + +/[-\p{Lu}&&\pL-]/B,alt_extended_class + - + A +\= Expect no match + a + +/[[\p{Lu}]-&&-[\pL]]/B,alt_extended_class + - + A +\= Expect no match + a + +/[-[\p{Lu}]&&[\pL]-]/B,alt_extended_class + - + A +\= Expect no match + a + +/(?xx:[ ^ 5[ ^ \p{Nd}] ])/B,alt_extended_class + 4 +\= Expect no match + a + ; + 5 + +/(?xx:[ ^ \p{Nd}[ ^ 5] ])/B,alt_extended_class +\= Expect no match + a + ; + 4 + 5 + +/(?xx:[ ^ \p{Nd}[ ^ \p{Nd}] ])/B,alt_extended_class +\= Expect no match + a + ; + 4 + 5 + +/[ ^ \p{Ll}[ ^ \p{Nd}] ]/B,alt_extended_class + \x20 + ^ + a + 0 +\= Expect no match + A + ; + +/[a-c--\p{Nd}]+/B,alt_extended_class + ac + a +\= Expect no match + 0 + +/[a-c--\p{Nd}]{2,3}/B,alt_extended_class + ac + cac +\= Expect no match + a + 00 + +/x[a-c--\p{Nd}]+y/B,alt_extended_class + xacy + xaay + xay +\= Expect no match + zacy + xacz + xy + x0y + +/[\pL--\pL--\pL]/B,alt_extended_class +\= Expect no match + A + 1 + +/[[\pL--\pL]--\pL]/B,alt_extended_class +\= Expect no match + A + 1 + +/[\pL--[\pL--\pL]]/B,alt_extended_class + A +\= Expect no match + 1 + +/[\pL--^\p{Nd}]/B,alt_extended_class + A +\= Expect no match + 1 + ^ + +/([a-z--[\pL&&n]])\1/B,alt_extended_class + aa + zz +\= Expect no match + az + nn + +/(x[a-z--[\pL&&n]]y)\1/B,alt_extended_class + xayxay + xzyxzy +\= Expect no match + xnyxny + +/(?:_\1|([a-z--[\pL&&n]])){2}/B,alt_extended_class + a_a + z_z +\= Expect no match + a_z + n_n + +/(?:_\1|([a-z--[\pL&&n]]))+/B,alt_extended_class + a_a + z_z + a_partial +\= Expect no match + n_n + +/[\p{Nd}||[\pL--\p{Lu}]]/B,alt_extended_class + a + 0 +\= Expect no match + C + +/[\P{Nd}||2]/B,alt_extended_class + _ + Z + 2 +\= Expect no match + 1 + 3 + +/[^[\P{Nd}]]/B,alt_extended_class + 1 + 2 +\= Expect no match + _ + z + +# caseless tests + +/[\p{Lu}~~\p{Ll}]/B,alt_extended_class + a + A +\= Expect no match + _ + 1 + +/[[\p{Lu}1]~~\p{Ll}]/iB,alt_extended_class + 1 +\= Expect no match + a + A + _ + +/[[\p{Lu}1]&&[\p{Ll}1]]/B,alt_extended_class + 1 +\= Expect no match + a + A + _ + 2 + +/[[\p{Lu}1]&&[\p{Ll}1]]/iB,alt_extended_class + a + A + 1 +\= Expect no match + _ + 2 + \ + +/[\p{Thai}&&\p{Nd}]/B,utf,alt_extended_class + \x{0e51} +\= Expect no match + 0 + a + \x{0e01} + +/[\p{Thai}||\p{Nd}]/B,utf,alt_extended_class + \x{0e51} + \x{0e01} + 0 +\= Expect no match + a + +/[\p{Thai}~~\p{Nd}]/B,utf,alt_extended_class + \x{0e01} + 0 +\= Expect no match + \x{0e51} + a + +/[[\p{Thai}&&\p{Nd}]~~[^a]]/B,utf,alt_extended_class + \x{0e01} + b + 0 +\= Expect no match + a + \x{0e51} + +/^[\p{Thai}&&\p{Nd}]?$/B,utf,alt_extended_class + \x{0e51} + \ +\= Expect no match + a + +/^[\p{Thai}&&\p{Nd}]??$/B,utf,alt_extended_class + \x{0e51} + \ +\= Expect no match + a + +/^[\p{Thai}&&\p{Nd}]?+$/B,utf,alt_extended_class + \x{0e51} + \ +\= Expect no match + a + +/^[\p{Thai}&&\p{Nd}]{3}$/B,utf,alt_extended_class + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]{3,}$/B,utf,alt_extended_class + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]{3,}?$/B,utf,alt_extended_class + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]{3,}+$/B,utf,alt_extended_class + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]{,3}$/B,utf,alt_extended_class + \ + \x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + a + +/^[\p{Thai}&&\p{Nd}]{,3}?$/B,utf,alt_extended_class + \ + \x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + a + +/^[\p{Thai}&&\p{Nd}]{,3}+$/B,utf,alt_extended_class + \ + \x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + a + +/^[\p{Thai}&&\p{Nd}]+\x{0e51}$/B,utf,alt_extended_class + \x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]+?\x{0e51}$/B,utf,alt_extended_class + \x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]++\x{0e51}$/B,utf,alt_extended_class +\= Expect no match + \x{0e51} + \x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} + \ + a + +/^[\p{Thai}&&\p{Nd}]*\x{0e51}$/B,utf,alt_extended_class + \x{0e51} + \x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \ + a + +/^[\p{Thai}&&\p{Nd}]*?\x{0e51}$/B,utf,alt_extended_class + \x{0e51} + \x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} +\= Expect no match + \ + a + +/^[\p{Thai}&&\p{Nd}]*+\x{0e51}$/B,utf,alt_extended_class +\= Expect no match + \x{0e51} + \x{0e51}\x{0e51} + \x{0e51}\x{0e51}\x{0e51} + \ + a + +/[^[^\p{Thai}]]/B,utf,alt_extended_class + \x{0e51} +\= Expect no match + a + +/[^[^\p{L}]]/B,utf,alt_extended_class + \x{0e01} + a +\= Expect no match + 0 + +/[\pL&&[^\x00-\xFF]]/B,utf,alt_extended_class + \x{21e} +\= Expect no match + a + +/[\pL&&\x{100}-\x{1000}]{3,6}+/utf,alt_extended_class + \x{145}\x{18b}A\x{145}\x{18b}\x{1C2}\x{21a}\x{257}\x{2ae}\x{0145}\x{18b} + \x{145}A\x{145}\x{18b}\x{1C2}B + +/[\pL&&\x{100}-\x{1000}]{3,6}\x{2A3}/utf,alt_extended_class + \x{145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{2a3} + \x{145}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3} + \x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3}\x{2a3} + \x{0145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{145}\x{2a3} + +/[\pL&&\x{100}-\x{1000}]{3,6}?\x{2A3}/utf,alt_extended_class + \x{145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{2a3} + \x{145}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3} + \x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3}\x{2a3} + \x{0145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{145}\x{2a3} + +/[\P{scx=Beng}\P{scx=Deva}\pM--[\x{2000}-\x{3000}]]+/utf,alt_extended_class + \x{964}\x{2000}\x{3000}A\x{951}\x{1fff}\x{3001}\x{965} + +/[\p{Thai}~~[^]]/B,utf,alt_extended_class,allow_empty_class + \x{0d01} + a +\= Expect no match + \x{0e01} + +/[[]~~[^]]/B,utf,alt_extended_class,allow_empty_class + \x{0d01} + a + +/[[^]~~[]]/B,utf,alt_extended_class,allow_empty_class + \x{0d01} + a + +/[[^]~~[^]]/B,utf,alt_extended_class,allow_empty_class +\= Expect no match + \x{0d01} + a + +/[[^]||\pL]/B,utf,alt_extended_class,allow_empty_class + 0 + a + +/[\pL||[^]]/B,utf,alt_extended_class,allow_empty_class + 0 + a + +/[\pL~~[^]]/B,utf,alt_extended_class,allow_empty_class + 0 +\= Expect no match + a + +/[[^]~~\pL]/B,utf,alt_extended_class,allow_empty_class + 0 +\= Expect no match + a + +/([\p{Lu}&&\p{sc=Hung}]+?\x{10c81})+#/utf,alt_extended_class + \x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10cf2}\x{10c81}#\x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10c81}## + +/[[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]]/utf,alt_extended_class + +# -------------- + +/^([\h\x{9000}\x{9002}\x{9004}][\v\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}][\h\v\x{9000}],){4}$/B,utf + \x09\x0a\x0d,\x{1680}\x{2028}\x{1680},\x{180e}\x{2029}\x{180e},\x{9000}\x{9000}\x{9000}, + +/[z-\p{Lu}]/ + +/[z-\pL]/ + +/[\p{Lu}-z]/ + +/[\pL-z]/ + +/[a\x{e1}]/iB + a + A + \x{e1} + +/[a\x{e1}]/iB,utf + a + A + \x{e1} + \x{c1} + +/[a\x{e1}]/iB,ucp + a + A + \x{e1} + \x{c1} + +/[a\x{e1}]/iB,ucp,utf + a + A + \x{e1} + \x{c1} + # End of testinput5 diff --git a/testdata/testinput6 b/testdata/testinput6 index f189239..1fbe4ce 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4308,7 +4308,6 @@ ab /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames -\= Expect no match ab /a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames @@ -4392,9 +4391,18 @@ /Z(*F)Q|ZXY/ Z\=ps + XY\=dfa_restart \= Expect no match ZA\=ps X\=ps + +/Z(?:(*F)Q|XY)/ + Z\=ps + XY\=dfa_restart + +/Z(*F)Q|Z(*F)XY/ +\= Expect no match + Z\=ps /\bthe cat\b/ the cat\=ps @@ -5042,4 +5050,154 @@ /|a(?0)/endanchored aaaa +/([a-z]++)(*scs:(1).)/ + aa + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[a[]/ + [ + +/[a[B]]C/alt_extended_class + aC + BC +\= Expect no match + [C + +/[[A][B]]/alt_extended_class + A + B +\= Expect no match + [ + ] + +/[[A]||[B]]/alt_extended_class + A + B +\= Expect no match + C + +/[[^A][B]]/alt_extended_class + B + C +\= Expect no match + A + +/[^[A][B]]/alt_extended_class + C +\= Expect no match + A + B + +/[^[A]&&[B]]/alt_extended_class + A + B + C + +/[A[]]]/alt_extended_class + A + ] +\= Expect no match + [ + +/[A[^]]]/alt_extended_class + A + [ + C +\= Expect no match + ] + +/[A[]]/alt_extended_class,allow_empty_class + A +\= Expect no match + ] + [ + +/[A[^]]/alt_extended_class,allow_empty_class + A + C + [ + ] + +/[A-C--B]/alt_extended_class + A + C +\= Expect no match + B + +/[^A-C--B]/alt_extended_class + B +\= Expect no match + A + C + +/[[\d\D]--b]/alt_extended_class + a + c +\= Expect no match + b + +/[\dAC-E[:space:]&&[^z]]/alt_extended_class + 0 + A + C + D + E + \t +\= Expect no match + B + F + ; + +/[z||[^\dAC-E[:space:]]]/alt_extended_class + z + B + F + ; +\= Expect no match + 0 + A + C + D + E + \t + +/[a-c--b]+/alt_extended_class + ac + a +\= Expect no match + b + +/[a-c--b]{2,3}/alt_extended_class + ac + cac +\= Expect no match + a + bb + +/x[a-c--b]+y/alt_extended_class + xacy + xaay + xay +\= Expect no match + zacy + xacz + xy + xby + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[[A]+[B]])/ + A + B +\= Expect no match + [ + ] + +# -------------- + # End of testinput6 diff --git a/testdata/testinput7 b/testdata/testinput7 index 896019f..97ffa37 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -1076,13 +1076,6 @@ \= Expect no match \x{660}\x{661}\x{662}ABC -/\p{Lu}/i,utf - A - a\x{10a0}B -\= Expect no match - a - \x{1d00} - /\p{^Lu}/i,utf 1234 \= Expect no match @@ -1156,30 +1149,6 @@ a A -/\p{Lu}/utf - A - aZ -\= Expect no match - abc - -/\p{Lu}/i,utf - A - aZ -\= Expect no match - abc - -/\p{Ll}/utf - a - Az -\= Expect no match - ABC - -/\p{Ll}/i,utf - a - Az -\= Expect no match - ABC - /^\x{c0}$/i,utf \x{c0} \x{e0} @@ -1666,7 +1635,7 @@ !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++ -# Without PCRE_UCP, non-ASCII always fail, even if < 256 +# Without PCRE2_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ @@ -1676,7 +1645,7 @@ !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++ -# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties +# With PCRE2_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ @@ -2328,6 +2297,163 @@ # End caseless restrict tests +# TESTS for PCRE2_EXTRA_TURKISH_CASING - again, tests with and without. + +/i/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/i/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/I/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/I/i,utf,turkish_casing + I + \x{0131} +\= Expect no match + i + \x{0130} + +/\x{0130}/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/\x{0130}/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/\x{0131}/i,utf + \x{0131} +\= Expect no match + i + I + \x{0130} + +/\x{0131}/i,utf,turkish_casing + I + \x{0131} +\= Expect no match + i + \x{0130} + +/[i]/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/[i]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[\x{0130}]/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/[\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[\x{0120}-\x{0130}]/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/[\x{0120}-\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[zi]/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/[zi]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[z\x{0130}]/i,utf + \x{0130} +\= Expect no match + i + I + \x{0131} + +/[z\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +/[iI]/i,utf + i + I +\= Expect no match + \x{0130} + \x{0131} + +/[iI]/i,utf,turkish_casing + i + I + \x{0130} + \x{0131} + +/[i\x{0130}]/i,utf + i + I + \x{0130} +\= Expect no match + \x{0131} + +/[i\x{0130}]/i,utf,turkish_casing + i + \x{0130} +\= Expect no match + I + \x{0131} + +# End Turkish casing tests + # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. # DIGITS @@ -2531,4 +2657,89 @@ /caf\B.+?\B/utf,ucp --cafe\x{300}_au\x{203f}lait! +# -------------------------------------------------------------------------- +# Case-independent matching property tests added after changing PCRE2 to be +# compatible with Perl. All three cases (upper, lower, title) conflate. + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/utf + >AbbD< + >Abb\x{01c5}< +\= Expect no match + >aBBd< + >aB!!< + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf + >aB!!< +\= Expect no match + >AbbD< + >aBBd< + >Abb\x{01c5}< + +/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf + >aB!!< +\= Expect no match + >AbbD< + >aBBd< + >Abb\x{01c5}< + +# -------------- + +# EXTENDED CHARACTER CLASSES + +/[\p{Ll}[\p{Nd}]]C/alt_extended_class + aC + 1C +\= Expect no match + [C + +/[[\p{Ll}][\p{Nd}]]/alt_extended_class + a + 1 +\= Expect no match + [ + ] + +/[[\p{Ll}]||[\p{Nd}]]/alt_extended_class + a + 1 +\= Expect no match + C + +/[[^\p{Ll}][\p{Nd}]]/alt_extended_class + 1 + A +\= Expect no match + a + +/[^[\p{Ll}][\p{Nd}]]/alt_extended_class + A +\= Expect no match + a + 1 + +/[^[\p{Ll}]&&[\p{Nd}]]/alt_extended_class + a + 1 + A + +/(?[[\p{Ll}]+[\p{Nd}]])/ + a + 1 +\= Expect no match + [ + ] + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[[\p{Ll}Z]&[\p{Lu}a]])/ + a + Z +\= Expect no match + A + z + +# -------------------------------------------------------------------------- + # End of testinput7 diff --git a/testdata/testinput9 b/testdata/testinput9 index 4eb228a..0f3e0a8 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -4,11 +4,19 @@ #forbid_utf #newline_default lf any anycrlf -/ab/ -\= Expect error message (too big char) and no match - A\x{123}B - A\o{443}B - +/a\xc4\xa3b/ + a\N{U+123}b +\= Expect no match # error message (too big char) + a\x{0123}b + a\o{00443}b + a\443b + +/fd bf bf bf bf bf/I,hex +\= Expect warning + \N{U+7fffffff} +\= Expect no match # error message (too big char) + \x{7fffffff} + /\x{100}/I /\o{400}/I @@ -263,4 +271,14 @@ /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 +# Should cause an error +/abc/substitute_extended,replace=>\777< + abc + +# Should cause an error +/abc/substitute_extended,replace=>\o{012345}< + abc + +/i/turkish_casing + # End of testinput9 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 753937f..4563f52 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -8174,6 +8174,19 @@ MK: B D No match, mark = B +/(*COMMIT)ABC/no_start_optimize + ABC + 0: ABC +\= Expect no match + DEFABC +No match + +/(*COMMIT)ABC/ + ABC + 0: ABC + DEFABC + 0: ABC + # This should fail, as the skip causes a bump to offset 3 (the skip). /A(*MARK:A)A+(*SKIP)(B|Z) | AC/x,mark @@ -8919,10 +8932,18 @@ No match 1: 2: B 3: A - -/^A\xZ/ - A\0Z - 0: A\x00Z + +/^A\xBz/ + A\x{0B}z + 0: A\x0bz + +/^A\xABz/ + A\x{AB}z + 0: A\xabz + +/^A\xABCz/ + A\x{AB}Cz + 0: A\xabCz /^A\o{123}B/ A\123B @@ -9243,14 +9264,6 @@ No match /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ -/[s[:digit:]\E-H]+/ - s09-H - 0: s09-H - -/[s[:digit:]\Q\E-H]+/ - s09-H - 0: s09-H - /a+(?:|b)a/ aaaa 0: aaaa @@ -10159,7 +10172,22 @@ No match /\214748364/ >\x{8c}748364< 0: \x8c748364 - + +# smaller than GROUP_MAX +/\21300/ + \x8b00 + 0: \x8b00 + +# larger than GROUP_MAX +/\213000/ + \x8b000 + 0: \x8b000 + +# larger than INT_MAX +/\21300000000/ + \x8b00000000 + 0: \x8b00000000 + /a{65536/ >a{65536< 0: a{65536 @@ -10503,4 +10531,595 @@ No match a No match +/\Qab*\E{2,}/ + ab***z + 0: ab*** + +/[\Qabc\E-z]+/ + abcdwxyz + 0: abcdwxyz + +/[\Qa-\Ez]+/ + xz-zaax + 0: z-zaa + +/a{(?#XYZ),2}/ + xa{,2}x + 0: a{,2} +\= Expect no match + xaax +No match + +/(?<=PQ|Pc.b?)(.?)(b?)/ + Pc.b + 0: b + 1: b + 2: + +/(?(?<=aa.b|ab)b).b/ + aaab + 0: ab + +/(?(?<=a(?:a.b|b))b).b/ + aaab + 0: ab + +/(?=a)b?a/ + a + 0: a + +/(?=a)b?a./ + ab + 0: ab + +/\w(?R)*\w/ + grtgt + 0: grtg + abcdef + 0: abcdef + abcdefg + 0: abcdef + .a.bc.d. + 0: bc +\= Expect no match + .a.b.c. +No match + +/65 00 64/hex + e\0d + 0: e\x00d + +/[[:digit:]- ]/xx + 1 + 0: 1 + - + 0: - +\= Expect no match + z +No match + \ \ +No match + +/[\d- ]/xx + 1 + 0: 1 + - + 0: - +\= Expect no match + z +No match + \ \ +No match + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[\n])/ + \n + 0: \x0a +\= Expect no match + \\ +No match + n +No match + +/^(?[\x61])b/ + ab + 0: ab +\= Expect no match + b +No match + a +No match + +/^(?[\x61])+b/ + ab + 0: ab + aab + 0: aab +\= Expect no match + b +No match + +/(?[ [[:graph:]] ])/ + a + 0: a +\= Expect no match + \x01 +No match + +/(?[ [:graph:] ])/ + a + 0: a +\= Expect no match + \x01 +No match + +/(?[ [[:graph:]\x02] ])/ + a + 0: a + \x02 + 0: \x02 +\= Expect no match + \x01 +No match + +/(?[\E\n])/ + \n + 0: \x0a +\= Expect no match + \\ +No match + E +No match + +/(?[\n \Q\E])/ + \n + 0: \x0a +\= Expect no match + \\ +No match + Q +No match + +/(?[ ( \x02 + [:graph:] ) | [ \x02 [:graph:] ] ])/ + a + 0: a + \x02 + 0: \x02 +\= Expect no match + \x01 +No match + +/(?[ \d ])/ + 1 + 0: 1 +\= Expect no match + d +No match + +/(?[[1]])/ + 1 + 0: 1 +\= Expect no match + ] +No match + +/(?[[a]])/ + a + 0: a +\= Expect no match + ] +No match + +/(?[[a-c]])/ + a + 0: a + b + 0: b +\= Expect no match + - +No match + ] +No match + +/(?[ [\t] + [\n] ])/ + \t + 0: \x09 + \n + 0: \x0a +\= Expect no match + t +No match + \\ +No match + [ +No match + +/(?[ \t + \n ])/ + \t + 0: \x09 + \n + 0: \x0a +\= Expect no match + t +No match + \\ +No match + [ +No match + +/(?[ [()] ])/ + ) + 0: ) + ( + 0: ( +\= Expect no match + ] +No match + +/(?[ ( [()] ) ])/ + ) + 0: ) + ( + 0: ( +\= Expect no match + ] +No match + +/(?[ (( [\n\t] )) ])/ + \n + 0: \x0a + \t + 0: \x09 +\= Expect no match + ) +No match + ( +No match + t +No match + +# Each syntax element, with unary operator applied to it + +/(?[ !\n ])/ + z + 0: z +\= Expect no match + \n +No match + +/(?[ !\d ])/ + a + 0: a +\= Expect no match + 1 +No match + +/(?[ ![:alpha:] ])/ + 1 + 0: 1 +\= Expect no match + a +No match + +/(?[ ![\n] ])/ + z + 0: z +\= Expect no match + \n +No match + +/(?[ !(\n) ])/ + z + 0: z +\= Expect no match + \n +No match + +/(?[ !!\n ])/ + \n + 0: \x0a +\= Expect no match + z +No match + +# Each syntax element, as contents of parens + +/(?[ (\n) ])/ + \n + 0: \x0a +\= Expect no match + z +No match + +/(?[ (\d) ])/ + 1 + 0: 1 +\= Expect no match + a +No match + +/(?[ ([:alpha:]) ])/ + a + 0: a +\= Expect no match + 1 +No match + +/(?[ ([\n]) ])/ + \n + 0: \x0a +\= Expect no match + z +No match + +/(?[ ((\n)) ])/ + \n + 0: \x0a +\= Expect no match + z +No match + +/(?[ (!\n) ])/ + z + 0: z +\= Expect no match + \n +No match + +/(?[ (\n + \t) ])/ + \n + 0: \x0a + \t + 0: \x09 +\= Expect no match + z +No match + +# Each syntax element, as LHS of a binary operator + +/(?[ \n & [\n\t] ])/ + \n + 0: \x0a +\= Expect no match + t +No match + +/(?[ \d & [\d\t] ])/ + 1 + 0: 1 +\= Expect no match + a +No match + +/(?[ [:alpha:] & [a-z\t] ])/ + a + 0: a +\= Expect no match + A +No match + \t +No match + +/(?[ [\n] & [\n\t] ])/ + \n + 0: \x0a +\= Expect no match + \t +No match + +/(?[ (\n) & [\n\t] ])/ + \n + 0: \x0a +\= Expect no match + \t +No match + +/(?[ !\n & [^\n\t] ])/ + a + 0: a +\= Expect no match + \n +No match + \t +No match + +/(?[ \n & [\n\t] + [\d] ])/ + \n + 0: \x0a + 1 + 0: 1 +\= Expect no match + \t +No match + a +No match + +# Each syntax element, as RHS of a binary operator + +/(?[ [\n\t] & \n ])/ + \n + 0: \x0a +\= Expect no match + t +No match + +/(?[ [\d\t] & \d ])/ + 1 + 0: 1 +\= Expect no match + a +No match + +/(?[ [a-z\t] & [:alpha:] ])/ + a + 0: a +\= Expect no match + A +No match + \t +No match + +/(?[ [\n\t] & [\n] ])/ + \n + 0: \x0a +\= Expect no match + \t +No match + +/(?[ [\n\t] & (\n) ])/ + \n + 0: \x0a +\= Expect no match + \t +No match + +/(?[ [^\n\t] & !\n ])/ + a + 0: a +\= Expect no match + \n +No match + \t +No match + +/(?[ [\d] + \n & [\n\t] ])/ + \n + 0: \x0a + 1 + 0: 1 +\= Expect no match + \t +No match + a +No match + +/(?[ [\d] + \n + [\t] ])/ + \n + 0: \x0a + \t + 0: \x09 + 1 + 0: 1 +\= Expect no match + a +No match + +# end op surrounding syntax tests + +/(?[ \d + \n ])/ + \n + 0: \x0a + 1 + 0: 1 +\= Expect no match + a +No match + +/(?[ \d | \n ])/ + \n + 0: \x0a + 1 + 0: 1 +\= Expect no match + a +No match + +/(?[ \d - [2] ])/ + 1 + 0: 1 + 3 + 0: 3 +\= Expect no match + 2 +No match + +/(?[ [AC] ^ [BC] ])/ + A + 0: A + B + 0: B +\= Expect no match + C +No match + D +No match + +/(?[ ( [ ^ z ] ) ])/ + j + 0: j +\= Expect no match + z +No match + +/^.{4}/s + abcdef + 0: abcd + abcde + 0: abcd + abcd + 0: abcd +\= Expect no match + abc +No match + ab +No match + a +No match + +/^(.{3,6}!)+$/s + abc!defghi! + 0: abc!defghi! + 1: defghi! + abcdef!ghi! + 0: abcdef!ghi! + 1: ghi! + abc!def!ghi!jkl! + 0: abc!def!ghi!jkl! + 1: jkl! + ab!cd! + 0: ab!cd! + 1: ab!cd! +\= Expect no match + abcd!ef! +No match + ab!cdefg! +No match + +/[a-z]{5,}b|x/ + abcdefghbijb + 0: abcdefghbijb + abcdefghbij + 0: abcdefghb + abcdeb + 0: abcdeb + abcdefghijx + 0: x +\= Expect no match + abcdb +No match + abcdefghijk +No match + +/[a-z]{1,6}?s|x/ + asbs + 0: as + abcdefs + 0: abcdefs + abcdefghijkss + 0: fghijks + abcdefghijkx + 0: x + ss + 0: ss +\= Expect no match + s +No match + aaa +No match + +# -------------- + # End of testinput1 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 1cf7584..d1bccf3 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -461,22 +461,22 @@ Subject length lower bound = 1 /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra - [\x00-`c-\xbf\xf1-\xff] (neg) + [^ab\xc0-\xf0] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 - \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf - \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee - \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd - \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 + \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf + \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee + \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd + \xfe \xff Subject length lower bound = 1 \x{f1} 0: \x{f1} @@ -492,22 +492,6 @@ No match \x{f0} No match -/Ā{3,4}/IB,utf ------------------------------------------------------------------- - Bra - \x{100}{3} - \x{100}?+ - Ket - End ------------------------------------------------------------------- -Capture group count = 0 -Options: utf -First code unit = \xc4 -Last code unit = \x80 -Subject length lower bound = 3 - \x{100}\x{100}\x{100}\x{100\x{100} - 0: \x{100}\x{100}\x{100} - /(\x{100}+|x)/IB,utf ------------------------------------------------------------------ Bra @@ -521,7 +505,7 @@ Subject length lower bound = 3 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: x \xc4 +Starting code units: x \xc4 Subject length lower bound = 1 /(\x{100}*a|x)/IB,utf @@ -538,7 +522,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a x \xc4 +Starting code units: a x \xc4 Subject length lower bound = 1 /(\x{100}{0,2}a|x)/IB,utf @@ -555,7 +539,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a x \xc4 +Starting code units: a x \xc4 Subject length lower bound = 1 /(\x{100}{1,2}a|x)/IB,utf @@ -573,7 +557,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: x \xc4 +Starting code units: x \xc4 Subject length lower bound = 1 /\x{100}/IB,utf @@ -620,7 +604,7 @@ Subject length lower bound = 3 /[^\x{c4}]/IB ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -664,7 +648,7 @@ Subject length lower bound = 1 /[^\xff]/IB,utf ------------------------------------------------------------------ Bra - [^\x{ff}] + [^\x{ff}] (not) Ket End ------------------------------------------------------------------ @@ -778,7 +762,7 @@ Subject length lower bound = 3 /\h/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3 +Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3 Subject length lower bound = 1 ABC\x{09} 0: \x{09} @@ -802,7 +786,7 @@ Subject length lower bound = 1 /\v/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 +Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 Subject length lower bound = 1 ABC\x{0a} 0: \x{0a} @@ -820,7 +804,7 @@ Subject length lower bound = 1 /\h*A/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 +Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 Last code unit = 'A' Subject length lower bound = 1 CDBABC @@ -829,21 +813,21 @@ Subject length lower bound = 1 /\v+A/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 +Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 Last code unit = 'A' Subject length lower bound = 2 /\s?xxx\s/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x Last code unit = 'x' Subject length lower bound = 4 /\sxxx\s/I,utf,tables=2 Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 Last code unit = 'x' Subject length lower bound = 5 AB\x{85}xxx\x{a0}XYZ @@ -854,15 +838,15 @@ Subject length lower bound = 5 /\S \S/I,utf,tables=2 Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 - \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 - \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 - \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 - \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Last code unit = ' ' Subject length lower bound = 3 \x{a2} \x{84} @@ -890,31 +874,31 @@ No match /\x{1234}+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: \xe1 +Starting code units: \xe1 Subject length lower bound = 1 /\x{1234}+?/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: \xe1 +Starting code units: \xe1 Subject length lower bound = 1 /\x{1234}++/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: \xe1 +Starting code units: \xe1 Subject length lower bound = 1 /\x{1234}{2}/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: \xe1 +Starting code units: \xe1 Subject length lower bound = 2 /[^\x{c4}]/IB,utf ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -939,7 +923,7 @@ Subject length lower bound = 2 /\R/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 +Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 Subject length lower bound = 1 /\777/IB,utf @@ -1093,7 +1077,7 @@ Subject length lower bound = 1 /A/utf \x{ff000041} -** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 +** Character \N{U+ff000041} is greater than 0x7fffffff and therefore cannot be encoded as UTF-8 \x{7f000041} Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 @@ -1157,7 +1141,7 @@ Subject length lower bound = 3 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: \xd0 \xd1 +Starting code units: \xd0 \xd1 Subject length lower bound = 17 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} @@ -1175,24 +1159,24 @@ Subject length lower bound = 17 /[^â±¥]/Bi,utf ------------------------------------------------------------------ Bra - /i [^\x{2c65}] + /i [^\x{2c65}] (not) Ket End ------------------------------------------------------------------ /\h/I Capture group count = 0 -Starting code units: \x09 \x20 \xa0 +Starting code units: \x09 \x20 \xa0 Subject length lower bound = 1 /\v/I Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 +Starting code units: \x0a \x0b \x0c \x0d \x85 Subject length lower bound = 1 /\R/I Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 +Starting code units: \x0a \x0b \x0c \x0d \x85 Subject length lower bound = 1 /[[:blank:]]/B,ucp @@ -1206,7 +1190,7 @@ Subject length lower bound = 1 /\x{212a}+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: K k \xe2 +Starting code units: K k \xe2 Subject length lower bound = 1 KKkk\x{212a} 0: KKkk\x{212a} @@ -1214,7 +1198,7 @@ Subject length lower bound = 1 /s+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: S s \xc5 +Starting code units: S s \xc5 Subject length lower bound = 1 SSss\x{17f} 0: SSss\x{17f} @@ -1229,7 +1213,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: A \xc4 +Starting code units: A \xc4 Last code unit = 'A' Subject length lower bound = 1 A @@ -1246,7 +1230,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 Subject length lower bound = 1 /[Z\x{100}]/IB,utf @@ -1258,7 +1242,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: Z \xc4 +Starting code units: Z \xc4 Subject length lower bound = 1 Z\x{100} 0: Z @@ -1276,7 +1260,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 +Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 Subject length lower bound = 1 /[z\Qa-d]Ā\E]/IB,utf @@ -1288,7 +1272,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: - ] a d z \xc4 +Starting code units: - ] a d z \xc4 Subject length lower bound = 1 \x{100} 0: \x{100} @@ -1309,7 +1293,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a b \xc4 +Starting code units: a b \xc4 Last code unit = 'z' Subject length lower bound = 7 @@ -1323,7 +1307,7 @@ Subject length lower bound = 7 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 Subject length lower bound = 1 /\x{100}*\d/IB,utf @@ -1336,7 +1320,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 Subject length lower bound = 1 /\x{100}*\w/IB,utf @@ -1349,9 +1333,9 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - \xc4 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xc4 Subject length lower bound = 1 /\x{100}*\D/IB,utf @@ -1364,16 +1348,16 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c - d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Subject length lower bound = 1 /\x{100}*\S/IB,utf @@ -1386,15 +1370,15 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 - \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 - \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 - \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 - \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /\x{100}*\W/IB,utf @@ -1407,14 +1391,14 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 - \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 - \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 - \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 - \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 + \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 + \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 + \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 + \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\x{105}-\x{109}]/IBi,utf @@ -1426,7 +1410,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: \xc4 +Starting code units: \xc4 Subject length lower bound = 1 \x{104} 0: \x{104} @@ -1443,13 +1427,13 @@ No match /[z-\x{100}]/IBi,utf ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 +Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 Subject length lower bound = 1 Z 0: Z @@ -1480,13 +1464,13 @@ No match /[z-\x{100}]/IBi,utf ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 +Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 Subject length lower bound = 1 /\x{3a3}B/IBi,utf @@ -1499,7 +1483,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: \xce \xcf +Starting code units: \xce \xcf Last code unit = 'B' (caseless) Subject length lower bound = 2 @@ -1519,7 +1503,7 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 /[\W\p{Any}]/B ------------------------------------------------------------------ Bra - [\x00-/:-@[-^`{-\xff\p{Any}] + AllAny Ket End ------------------------------------------------------------------ @@ -1531,7 +1515,7 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 /[\W\pL]/B ------------------------------------------------------------------ Bra - [\x00-/:-@[-^`{-\xff\p{L}] + [\x00-/:-^`-\xff\p{L}] Ket End ------------------------------------------------------------------ @@ -1547,7 +1531,7 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), /[\s[:^ascii:]]/B,ucp ------------------------------------------------------------------ Bra - [\x80-\xff\p{Xsp}] + [\x09-\x0d \x80-\xff\p{Xsp}] Ket End ------------------------------------------------------------------ @@ -1575,23 +1559,23 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Bra ^ - [\x00-`c-\xff] (neg) + [^ab] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: utf Overall options: anchored utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 - \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf - \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee - \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd - \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 + \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf + \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee + \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd + \xfe \xff Subject length lower bound = 1 c 0: c @@ -1775,7 +1759,7 @@ Subject length lower bound = 1 /[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xc3 +Starting code units: \xc3 Last code unit = 'X' Subject length lower bound = 3 @@ -1788,31 +1772,31 @@ Subject length lower bound = 3 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: , \xf3 +Starting code units: , \xf3 Subject length lower bound = 1 /[\x{fff4}-\x{ffff8}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xef \xf0 \xf1 \xf2 \xf3 +Starting code units: \xef \xf0 \xf1 \xf2 \xf3 Subject length lower bound = 1 /[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xef \xf0 \xf1 \xf2 \xf4 +Starting code units: \xef \xf0 \xf1 \xf2 \xf4 Subject length lower bound = 1 /[\xff\x{ffff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xc3 \xef +Starting code units: \xc3 \xef Subject length lower bound = 1 /[\xff\x{ff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xc3 +Starting code units: \xc3 Subject length lower bound = 1 abc\x{ff}def 0: \x{ff} @@ -1832,13 +1816,13 @@ Subject length lower bound = 1 /[Ss]/I,utf Capture group count = 0 Options: utf -Starting code units: S s +Starting code units: S s Subject length lower bound = 1 /(?:\x{ff}|\x{3000})/I,utf Capture group count = 0 Options: utf -Starting code units: \xc3 \xe3 +Starting code units: \xc3 \xe3 Subject length lower bound = 1 /x/utf @@ -1872,7 +1856,7 @@ Subject length lower bound = 2 /a|\x{c1}/iI,ucp Capture group count = 0 Options: caseless ucp -Starting code units: A a \xc1 \xe1 +Starting code units: A a \xc1 \xe1 Subject length lower bound = 1 \x{e1}xxx 0: \xe1 @@ -1880,7 +1864,7 @@ Subject length lower bound = 1 /a|\x{c1}/iI,utf Capture group count = 0 Options: caseless utf -Starting code units: A a \xc3 +Starting code units: A a \xc3 Subject length lower bound = 1 \x{e1}xxx 0: \x{e1} @@ -1925,4 +1909,109 @@ No match A\x80\x42\n No match +/ab$/match_invalid_utf +\= Expect no match + ab\x80cde +No match + +/ab\z/match_invalid_utf +\= Expect no match + ab\x80cde +No match + +/ab\Z/match_invalid_utf +\= Expect no match + ab\x80cde +No match + +/(..)(*scs:(1)ab\z)/match_invalid_utf + ab\x80cde + 0: ab + 1: ab + +/(..)(*scs:(1)ab\Z)/match_invalid_utf + ab\x80cde + 0: ab + 1: ab + +/(..)(*scs:(1)ab$)/match_invalid_utf + ab\x80cde + 0: ab + 1: ab + +/(.) \1/i,ucp + i I + 0: i I + 1: i + +/(.) \1/i,ucp,turkish_casing +Failed: error 205 at offset 0: PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode + +/[\x60-\x7f]/i,ucp,turkish_casing +Failed: error 205 at offset 0: PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode + i +\= Expect no match + I + +/[\x60-\xc0]/i,ucp,turkish_casing +Failed: error 205 at offset 0: PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode + i +\= Expect no match + I + +/[\x80-\xc0]/i,ucp,turkish_casing +Failed: error 205 at offset 0: PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode +\= Expect no match + i + I + +# python_octal + +/\400/ +Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode + +/abc/substitute_extended + abc\=replace=\400 +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/\400/python_octal +Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL) + +/abc/substitute_extended,python_octal + abc\=replace=\400 +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/\400/utf + +/abc/utf,substitute_extended + abc\=replace=\400 + 1: \x{100} + +/\400/utf,python_octal +Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL) + +/abc/utf,substitute_extended,python_octal + abc\=replace=\400 +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/[\x00-\x2f\x11-\xff]+/B +------------------------------------------------------------------ + Bra + AllAny++ + Ket + End +------------------------------------------------------------------ + abcd + 0: abcd + +/[\x00-\x2f\x11-\xff]{4,}/B,utf +------------------------------------------------------------------ + Bra + [\x00-\xff]{4,}+ + Ket + End +------------------------------------------------------------------ + abcd + 0: abcd + # End of testinput10 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 8768785..8b9cd01 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -9,7 +9,7 @@ /[^\x{c4}]/IB ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -218,9 +218,9 @@ Subject length lower bound = 1 Capture group count = 0 Contains explicit CR or LF match Options: extended -Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 - 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff +Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 + 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff Subject length lower bound = 3 /[\h]/B @@ -261,7 +261,7 @@ Subject length lower bound = 3 /\h+/I Capture group count = 0 -Starting code units: \x09 \x20 \xa0 \xff +Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} @@ -276,7 +276,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x09 \x20 \xa0 \xff +Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} @@ -307,7 +307,7 @@ Subject length lower bound = 1 /\v+/I Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} @@ -322,7 +322,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} @@ -346,7 +346,7 @@ Subject length lower bound = 1 /\R+/I,bsr=unicode Capture group count = 0 \R matches any Unicode newline -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} @@ -364,11 +364,11 @@ Subject length lower bound = 6 /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B ------------------------------------------------------------------ Bra - [^\x{80}] - [^\x{ff}] - [^\x{100}] - [^\x{1000}] - [^\x{ffff}] + [^\x{80}] (not) + [^\x{ff}] (not) + [^\x{100}] (not) + [^\x{1000}] (not) + [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ @@ -376,11 +376,11 @@ Subject length lower bound = 6 /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi ------------------------------------------------------------------ Bra - /i [^\x{80}] - /i [^\x{ff}] - /i [^\x{100}] - /i [^\x{1000}] - /i [^\x{ffff}] + /i [^\x{80}] (not) + /i [^\x{ff}] (not) + /i [^\x{100}] (not) + /i [^\x{1000}] (not) + /i [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ @@ -388,15 +388,15 @@ Subject length lower bound = 6 /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B ------------------------------------------------------------------ Bra - [^\x{100}]* - [^\x{1000}]+ - [^\x{ffff}]?? - [^\x{8000}]{4} - [^\x{8000}]* - [^\x{7fff}]{2} - [^\x{7fff}]{0,7}? - [^\x{100}]{5} - [^\x{100}]?+ + [^\x{100}]* (not) + [^\x{1000}]+ (not) + [^\x{ffff}]?? (not) + [^\x{8000}]{4} (not) + [^\x{8000}]* (not) + [^\x{7fff}]{2} (not) + [^\x{7fff}]{0,7}? (not) + [^\x{100}]{5} (not) + [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ @@ -404,15 +404,15 @@ Subject length lower bound = 6 /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi ------------------------------------------------------------------ Bra - /i [^\x{100}]* - /i [^\x{1000}]+ - /i [^\x{ffff}]?? - /i [^\x{8000}]{4} - /i [^\x{8000}]* - /i [^\x{7fff}]{2} - /i [^\x{7fff}]{0,7}? - /i [^\x{100}]{5} - /i [^\x{100}]?+ + /i [^\x{100}]* (not) + /i [^\x{1000}]+ (not) + /i [^\x{ffff}]?? (not) + /i [^\x{8000}]{4} (not) + /i [^\x{8000}]* (not) + /i [^\x{7fff}]{2} (not) + /i [^\x{7fff}]{0,7}? (not) + /i [^\x{100}]{5} (not) + /i [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ @@ -474,8 +474,8 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ Bra - [b-\xff] (neg){12,} - [\x00-a] (neg)*+ + [^\x00-a]{12,} + [^b-\xff]*+ Ket End ------------------------------------------------------------------ @@ -483,16 +483,16 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B ------------------------------------------------------------------ Bra - [\x00-\x08\x0e-\x1f!-\xff] (neg)* + [^\x09-\x0d ]* \s* [0-9A-Z_a-z]++ \W+ - [\x00-/:-\xff] (neg)*? + [^0-9]*? \d 0 - [\x00-/:-@[-^`{-\xff] (neg){4,6}? + [^0-9A-Z_a-z]{4,6}? \w* A Ket @@ -601,20 +601,20 @@ Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b - \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a - \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 - : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ - _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 - \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f - \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e - \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae - \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd - \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc - \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb - \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea - \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 - \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\V]/IB @@ -625,20 +625,20 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e - \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d - \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > - ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c - d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 - \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 - \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 - \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 - \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf - \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce - \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd - \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec - \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb - \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e + \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d + \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff Subject length lower bound = 1 /(*THEN:\[A]{65501})/expand @@ -646,10 +646,34 @@ Subject length lower bound = 1 # We can use pcre2test's utf8_input modifier to create wide pattern characters, # even though this test is run when UTF is not supported. +/a\x{d800}b/utf8_input + aí €b + 0: a\x{d800}b + a\x{d800}b + 0: a\x{d800}b + a\o{154000}b + 0: a\x{d800}b +\= Expect warning unless 32bit + a\N{U+d800}b +** Warning: character \N{U+d800} is a surrogate and should not be encoded as UTF-16 + 0: a\x{d800}b + +/a\x{ffff}b/utf8_input + aï¿¿b + 0: a\x{ffff}b + a\x{ffff}b + 0: a\x{ffff}b + a\o{177777}b + 0: a\x{ffff}b + a\N{U+ffff}b + 0: a\x{ffff}b + /abý¿¿¿¿¿z/utf8_input ** Failed: character value greater than 0xffff cannot be converted to 16-bit in non-UTF mode abý¿¿¿¿¿z ab\x{7fffffff}z + ab\o{17777777777}z + ab\N{U+7fffffff}z /abÿý¿¿¿¿¿z/utf8_input ** Failed: invalid UTF-8 string cannot be converted to 16-bit string @@ -660,9 +684,170 @@ Subject length lower bound = 1 ** Failed: invalid UTF-8 string cannot be converted to 16-bit string abÿAz ab\x{80000041}z +\= Expect no match + abAz + aAz + ab\377Az + ab\xff\N{U+0041}z + ab\N{U+ff}\N{U+41}z + +/ab\x{80000041}z/ +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + ab\x{80000041}z /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 0: A\x{1b6}6666666 +/abc/substitute_extended,replace=>\777< + abc + 1: >\x{1ff}< + +/abc/substitute_extended,replace=>\o{012345}< + abc + 1: >\x{14e5}< + +# Character range merging tests + +/[\x{100}-\x{200}\H\x{8000}-\x{9000}]/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B +------------------------------------------------------------------ + Bra + AllAny + # + AllAny{5} + AllAny{0,2}? + Ket + End +------------------------------------------------------------------ + +/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B +Failed: error 134 at offset 34: character code point value in \x{} or \o{} is too large + +/[\x00-\x2f\x11-\xff]*?!/B +------------------------------------------------------------------ + Bra + [\x00-\xff]*? + ! + Ket + End +------------------------------------------------------------------ + abcd!e + 0: abcd! + +/i/turkish_casing +Failed: error 204 at offset 0: PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode + +# Character list tests + +/([\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}?).#/B +Failed: error 134 at offset 66: character code point value in \x{} or \o{} is too large + \x{9001}\x{9007}\x{8000}\x{ffff}\x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# + +/([\x{3000}\x{3001}\x{3003}\x{3004}\x{3006}\x{3007}\x{8000}-\x{ffff}\x{100001}\x{100002}\x{100004}\x{100005}\x{100007}\x{100008}\x{10000a}\x{10000b}\x{80000000}-\x{ffffffff}]{5,}).#/B +Failed: error 134 at offset 76: character code point value in \x{} or \o{} is too large + \x{2fff}\x{3002}\x{7fff}\x{100000}\x{7fffffff}\x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# + +/([^\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}]+).#/B +Failed: error 134 at offset 124: character code point value in \x{} or \o{} is too large + \x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# + \x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# + \x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# + \x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}\x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +# META_BIGVALUE tests + +/\x{80000000}/B +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + \x{80000000} +\= Expect no match + \x{7fffffff} + \x{80000001} + +/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + \x{80000002} + \x{8fffffff} +\= Expect no match + \x{7fffffff} + \x{90000000} + +/\x{80000000}/B,alt_extended_class +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + \x{80000000} +\= Expect no match + \x{7fffffff} + \x{80000001} + +/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B,alt_extended_class +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + \x{80000002} + \x{8fffffff} +\= Expect no match + \x{7fffffff} + \x{90000000} + +/[\x{80000000}-\x{8000000f}--\x{80000002}]/B,alt_extended_class +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +/[[\x{80000000}-\x{8000000f}]--[\x{80000002}]]/B,alt_extended_class +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# META_BIGVALUE tests + +/(?[[\x{80000000}-\x{8000000f}]+\x{8fffffff}])/B +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large + \x{80000002} + \x{8fffffff} +\= Expect no match + \x{7fffffff} + \x{90000000} + +/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large + \x{80000001} + \x{80000003} +\= Expect no match + \x{80000002} + +# -------------- + # End of testinput11 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 2c95f61..9b71cc6 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -9,7 +9,7 @@ /[^\x{c4}]/IB ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -218,9 +218,9 @@ Subject length lower bound = 1 Capture group count = 0 Contains explicit CR or LF match Options: extended -Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 - 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff +Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 + 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff Subject length lower bound = 3 /[\h]/B @@ -261,7 +261,7 @@ Subject length lower bound = 3 /\h+/I Capture group count = 0 -Starting code units: \x09 \x20 \xa0 \xff +Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} @@ -276,7 +276,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x09 \x20 \xa0 \xff +Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} @@ -307,7 +307,7 @@ Subject length lower bound = 1 /\v+/I Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} @@ -322,7 +322,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} @@ -346,7 +346,7 @@ Subject length lower bound = 1 /\R+/I,bsr=unicode Capture group count = 0 \R matches any Unicode newline -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} @@ -364,11 +364,11 @@ Subject length lower bound = 6 /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B ------------------------------------------------------------------ Bra - [^\x{80}] - [^\x{ff}] - [^\x{100}] - [^\x{1000}] - [^\x{ffff}] + [^\x{80}] (not) + [^\x{ff}] (not) + [^\x{100}] (not) + [^\x{1000}] (not) + [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ @@ -376,11 +376,11 @@ Subject length lower bound = 6 /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi ------------------------------------------------------------------ Bra - /i [^\x{80}] - /i [^\x{ff}] - /i [^\x{100}] - /i [^\x{1000}] - /i [^\x{ffff}] + /i [^\x{80}] (not) + /i [^\x{ff}] (not) + /i [^\x{100}] (not) + /i [^\x{1000}] (not) + /i [^\x{ffff}] (not) Ket End ------------------------------------------------------------------ @@ -388,15 +388,15 @@ Subject length lower bound = 6 /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B ------------------------------------------------------------------ Bra - [^\x{100}]* - [^\x{1000}]+ - [^\x{ffff}]?? - [^\x{8000}]{4} - [^\x{8000}]* - [^\x{7fff}]{2} - [^\x{7fff}]{0,7}? - [^\x{100}]{5} - [^\x{100}]?+ + [^\x{100}]* (not) + [^\x{1000}]+ (not) + [^\x{ffff}]?? (not) + [^\x{8000}]{4} (not) + [^\x{8000}]* (not) + [^\x{7fff}]{2} (not) + [^\x{7fff}]{0,7}? (not) + [^\x{100}]{5} (not) + [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ @@ -404,15 +404,15 @@ Subject length lower bound = 6 /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi ------------------------------------------------------------------ Bra - /i [^\x{100}]* - /i [^\x{1000}]+ - /i [^\x{ffff}]?? - /i [^\x{8000}]{4} - /i [^\x{8000}]* - /i [^\x{7fff}]{2} - /i [^\x{7fff}]{0,7}? - /i [^\x{100}]{5} - /i [^\x{100}]?+ + /i [^\x{100}]* (not) + /i [^\x{1000}]+ (not) + /i [^\x{ffff}]?? (not) + /i [^\x{8000}]{4} (not) + /i [^\x{8000}]* (not) + /i [^\x{7fff}]{2} (not) + /i [^\x{7fff}]{0,7}? (not) + /i [^\x{100}]{5} (not) + /i [^\x{100}]?+ (not) Ket End ------------------------------------------------------------------ @@ -474,8 +474,8 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ Bra - [b-\xff] (neg){12,} - [\x00-a] (neg)*+ + [^\x00-a]{12,} + [^b-\xff]*+ Ket End ------------------------------------------------------------------ @@ -483,16 +483,16 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B ------------------------------------------------------------------ Bra - [\x00-\x08\x0e-\x1f!-\xff] (neg)* + [^\x09-\x0d ]* \s* [0-9A-Z_a-z]++ \W+ - [\x00-/:-\xff] (neg)*? + [^0-9]*? \d 0 - [\x00-/:-@[-^`{-\xff] (neg){4,6}? + [^0-9A-Z_a-z]{4,6}? \w* A Ket @@ -604,20 +604,20 @@ Subject length lower bound = 2 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b - \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a - \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 - : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ - _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 - \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f - \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e - \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae - \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd - \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc - \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb - \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea - \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 - \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\V]/IB @@ -628,20 +628,20 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e - \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d - \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > - ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c - d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 - \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 - \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 - \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 - \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf - \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce - \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd - \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec - \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb - \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e + \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d + \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff Subject length lower bound = 1 /(*THEN:\[A]{65501})/expand @@ -649,10 +649,36 @@ Subject length lower bound = 1 # We can use pcre2test's utf8_input modifier to create wide pattern characters, # even though this test is run when UTF is not supported. +/a\x{d800}b/utf8_input + aí €b + 0: a\x{d800}b + a\x{d800}b + 0: a\x{d800}b + a\o{154000}b + 0: a\x{d800}b +\= Expect warning unless 32bit + a\N{U+d800}b + 0: a\x{d800}b + +/a\x{ffff}b/utf8_input + aï¿¿b + 0: a\x{ffff}b + a\x{ffff}b + 0: a\x{ffff}b + a\o{177777}b + 0: a\x{ffff}b + a\N{U+ffff}b + 0: a\x{ffff}b + /abý¿¿¿¿¿z/utf8_input abý¿¿¿¿¿z 0: ab\x{7fffffff}z ab\x{7fffffff}z + 0: ab\x{7fffffff}z + ab\o{17777777777}z + 0: ab\x{7fffffff}z + ab\N{U+7fffffff}z +** Warning: character \N{U+7fffffff} is greater than 0x10ffff and should not be encoded as UTF-32 0: ab\x{7fffffff}z /abÿý¿¿¿¿¿z/utf8_input @@ -666,9 +692,321 @@ Subject length lower bound = 1 0: ab\x{80000041}z ab\x{80000041}z 0: ab\x{80000041}z +\= Expect no match + abAz +No match + aAz +No match + ab\377Az +No match + ab\xff\N{U+0041}z +No match + ab\N{U+ff}\N{U+41}z +No match + +/ab\x{80000041}z/ + ab\x{80000041}z + 0: ab\x{80000041}z /(?i:A{1,}\6666666666)/ A\x{1b6}6666666 0: A\x{1b6}6666666 +/abc/substitute_extended,replace=>\777< + abc + 1: >\x{1ff}< + +/abc/substitute_extended,replace=>\o{012345}< + abc + 1: >\x{14e5}< + +# Character range merging tests + +/[\x{100}-\x{200}\H\x{8000}-\x{9000}]/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + +/[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + +/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B +------------------------------------------------------------------ + Bra + [\x00-\xff\x{100}-\x{ffff}] + # + [\x00-\xff\x{100}-\x{ffff}]{5,7}? + Ket + End +------------------------------------------------------------------ + +/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B +------------------------------------------------------------------ + Bra + AllAny + # + AllAny{5} + AllAny{0,2}? + Ket + End +------------------------------------------------------------------ + +/[\x00-\x2f\x11-\xff]*?!/B +------------------------------------------------------------------ + Bra + [\x00-\xff]*? + ! + Ket + End +------------------------------------------------------------------ + abcd!e + 0: abcd! + +/i/turkish_casing +Failed: error 204 at offset 0: PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode + +# Character list tests + +/([\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}?).#/B +------------------------------------------------------------------ + Bra + CBra 1 + [\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}? + Ket + Any + # + Ket + End +------------------------------------------------------------------ + \x{9001}\x{9007}\x{8000}\x{ffff}\x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# + 0: \x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}# + 1: \x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000} + +/([\x{3000}\x{3001}\x{3003}\x{3004}\x{3006}\x{3007}\x{8000}-\x{ffff}\x{100001}\x{100002}\x{100004}\x{100005}\x{100007}\x{100008}\x{10000a}\x{10000b}\x{80000000}-\x{ffffffff}]{5,}).#/B +------------------------------------------------------------------ + Bra + CBra 1 + [\x{3000}-\x{3001}\x{3003}-\x{3004}\x{3006}-\x{3007}\x{8000}-\x{ffff}\x{100001}-\x{100002}\x{100004}-\x{100005}\x{100007}-\x{100008}\x{10000a}-\x{10000b}\x{80000000}-\x{ffffffff}]{5,} + Ket + Any + # + Ket + End +------------------------------------------------------------------ + \x{2fff}\x{3002}\x{7fff}\x{100000}\x{7fffffff}\x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# + 0: \x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}# + 1: \x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff} + +/([^\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}]+).#/B +------------------------------------------------------------------ + Bra + CBra 1 + [^\x{4000}\x{4002}\x{4004}-\x{4005}\x{4007}\x{4009}-\x{400a}\x{f000}\x{f002}\x{f004}-\x{f005}\x{f007}\x{f009}-\x{f00a}\x{100000}\x{100002}\x{100004}-\x{100005}\x{100007}\x{100009}-\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}-\x{a0000005}\x{a0000007}\x{a0000009}-\x{a000000a}]+ + Ket + Any + # + Ket + End +------------------------------------------------------------------ + \x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# + 0: \x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}# + 1: \x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b} + \x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# + 0: \x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}# + 1: \x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b} + \x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# + 0: \x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}# + 1: \x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b} + \x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}\x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# + 0: \x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}# + 1: \x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b} + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +# META_BIGVALUE tests + +/\x{80000000}/B +------------------------------------------------------------------ + Bra + \x{80000000} + Ket + End +------------------------------------------------------------------ + \x{80000000} + 0: \x{80000000} +\= Expect no match + \x{7fffffff} +No match + \x{80000001} +No match + +/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B +------------------------------------------------------------------ + Bra + [\x{80000000}-\x{8000000f}\x{8fffffff}] + Ket + End +------------------------------------------------------------------ + \x{80000002} + 0: \x{80000002} + \x{8fffffff} + 0: \x{8fffffff} +\= Expect no match + \x{7fffffff} +No match + \x{90000000} +No match + +/\x{80000000}/B,alt_extended_class +------------------------------------------------------------------ + Bra + \x{80000000} + Ket + End +------------------------------------------------------------------ + \x{80000000} + 0: \x{80000000} +\= Expect no match + \x{7fffffff} +No match + \x{80000001} +No match + +/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\x{80000000}-\x{8000000f}\x{8fffffff}] + Ket + End +------------------------------------------------------------------ + \x{80000002} + 0: \x{80000002} + \x{8fffffff} + 0: \x{8fffffff} +\= Expect no match + \x{7fffffff} +No match + \x{90000000} +No match + +/[\x{80000000}-\x{8000000f}--\x{80000002}]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\x{80000000}-\x{8000000f}] + xclass: [^\x{80000002}] + AND + ] + Ket + End +------------------------------------------------------------------ + \x{80000001} + 0: \x{80000001} + \x{80000003} + 0: \x{80000003} +\= Expect no match + \x{80000002} +No match + +/[[\x{80000000}-\x{8000000f}]--[\x{80000002}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\x{80000000}-\x{8000000f}] + xclass: [^\x{80000002}] + AND + ] + Ket + End +------------------------------------------------------------------ + \x{80000001} + 0: \x{80000001} + \x{80000003} + 0: \x{80000003} +\= Expect no match + \x{80000002} +No match + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# META_BIGVALUE tests + +/(?[[\x{80000000}-\x{8000000f}]+\x{8fffffff}])/B +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\x{80000000}-\x{8000000f}] + xclass: [\x{8fffffff}] + OR + ] + Ket + End +------------------------------------------------------------------ + \x{80000002} + 0: \x{80000002} + \x{8fffffff} + 0: \x{8fffffff} +\= Expect no match + \x{7fffffff} +No match + \x{90000000} +No match + +/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\x{80000000}-\x{8000000f}] + xclass: [^\x{80000002}] + AND + ] + Ket + End +------------------------------------------------------------------ + \x{80000001} + 0: \x{80000001} + \x{80000003} + 0: \x{80000003} +\= Expect no match + \x{80000002} +No match + +/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\x{80000000}-\x{8000000f}] + xclass: [^\x{80000002}] + AND + ] + Ket + End +------------------------------------------------------------------ + \x{80000001} + 0: \x{80000001} + \x{80000003} + 0: \x{80000003} +\= Expect no match + \x{80000002} +No match + +# -------------- + # End of testinput11 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 616d693..bd4d524 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -241,23 +241,23 @@ Subject length lower bound = 1 /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra - [\x00-`c-\xbf\xf1-\xff] (neg) + [^ab\xc0-\xf0] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e - \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d - \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac - \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb - \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb - \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff Subject length lower bound = 1 \x{f1} 0: \x{f1} @@ -273,22 +273,6 @@ No match \x{f0} No match -/Ā{3,4}/IB,utf ------------------------------------------------------------------- - Bra - \x{100}{3} - \x{100}?+ - Ket - End ------------------------------------------------------------------- -Capture group count = 0 -Options: utf -First code unit = \x{100} -Last code unit = \x{100} -Subject length lower bound = 3 - \x{100}\x{100}\x{100}\x{100\x{100} - 0: \x{100}\x{100}\x{100} - /(\x{100}+|x)/IB,utf ------------------------------------------------------------------ Bra @@ -302,7 +286,7 @@ Subject length lower bound = 3 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: x \xff +Starting code units: x \xff Subject length lower bound = 1 /(\x{100}*a|x)/IB,utf @@ -319,7 +303,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a x \xff +Starting code units: a x \xff Subject length lower bound = 1 /(\x{100}{0,2}a|x)/IB,utf @@ -336,7 +320,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a x \xff +Starting code units: a x \xff Subject length lower bound = 1 /(\x{100}{1,2}a|x)/IB,utf @@ -354,7 +338,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: x \xff +Starting code units: x \xff Subject length lower bound = 1 /\x{100}/IB,utf @@ -400,7 +384,7 @@ Subject length lower bound = 3 /[^\x{c4}]/IB ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -442,7 +426,7 @@ Subject length lower bound = 1 /[^\xff]/IB,utf ------------------------------------------------------------------ Bra - [^\x{ff}] + [^\x{ff}] (not) Ket End ------------------------------------------------------------------ @@ -531,7 +515,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{dfff} Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{110000} -** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 +** Failed: character \N{U+110000} is greater than 0x10ffff and therefore cannot be encoded as UTF-16 XX\x{d800}\x{1234} Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 \= Expect no match @@ -580,7 +564,7 @@ Failed: error 160 at offset 14: (*VERB) not recognized or malformed /\h/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x20 \xa0 \xff +Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 ABC\x{09} 0: \x{09} @@ -604,7 +588,7 @@ Subject length lower bound = 1 /\v/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 ABC\x{0a} 0: \x{0a} @@ -622,7 +606,7 @@ Subject length lower bound = 1 /\h*A/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x20 A \xa0 \xff +Starting code units: \x09 \x20 A \xa0 \xff Last code unit = 'A' Subject length lower bound = 1 CDBABC @@ -634,7 +618,7 @@ Subject length lower bound = 1 Capture group count = 0 Options: utf \R matches any Unicode newline -Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff Last code unit = 'A' Subject length lower bound = 1 CDBABC @@ -645,21 +629,21 @@ Subject length lower bound = 1 /\v+A/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Last code unit = 'A' Subject length lower bound = 2 /\s?xxx\s/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x Last code unit = 'x' Subject length lower bound = 4 /\sxxx\s/I,utf,tables=2 Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 Last code unit = 'x' Subject length lower bound = 5 AB\x{85}xxx\x{a0}XYZ @@ -670,20 +654,20 @@ Subject length lower bound = 5 /\S \S/I,utf,tables=2 Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 - \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 - \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 - \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 - \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 + \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Last code unit = ' ' Subject length lower bound = 3 \x{a2} \x{84} @@ -735,7 +719,7 @@ Subject length lower bound = 2 /[^\x{c4}]/IB,utf ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -760,7 +744,7 @@ Subject length lower bound = 2 /\R/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 # Check bad offset @@ -1003,7 +987,7 @@ Subject length lower bound = 17 /[^â±¥]/Bi,utf ------------------------------------------------------------------ Bra - /i [^\x{2c65}] + /i [^\x{2c65}] (not) Ket End ------------------------------------------------------------------ @@ -1019,7 +1003,7 @@ Subject length lower bound = 17 /\x{212a}+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: K k \xff +Starting code units: K k \xff Subject length lower bound = 1 KKkk\x{212a} 0: KKkk\x{212a} @@ -1027,7 +1011,7 @@ Subject length lower bound = 1 /s+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: S s \xff +Starting code units: S s \xff Subject length lower bound = 1 SSss\x{17f} 0: SSss\x{17f} @@ -1050,7 +1034,7 @@ Failed: error 134 at offset 10: character code point value in \x{} or \o{} is to ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: A \xff +Starting code units: A \xff Last code unit = 'A' Subject length lower bound = 1 A @@ -1067,7 +1051,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff Subject length lower bound = 1 /[Z\x{100}]/IB,utf @@ -1079,7 +1063,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: Z \xff +Starting code units: Z \xff Subject length lower bound = 1 Z\x{100} 0: Z @@ -1097,15 +1081,15 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 - \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 - \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 - \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 - \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 - \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 - \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 - \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 - \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 + \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 + \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 + \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 + \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[z\Qa-d]Ā\E]/IB,utf @@ -1117,7 +1101,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: - ] a d z \xff +Starting code units: - ] a d z \xff Subject length lower bound = 1 \x{100} 0: \x{100} @@ -1138,7 +1122,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a b \xff +Starting code units: a b \xff Last code unit = 'z' Subject length lower bound = 7 @@ -1152,7 +1136,7 @@ Subject length lower bound = 7 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff Subject length lower bound = 1 /\x{100}*\d/IB,utf @@ -1165,7 +1149,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff Subject length lower bound = 1 /\x{100}*\w/IB,utf @@ -1178,9 +1162,9 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - \xff +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xff Subject length lower bound = 1 /\x{100}*\D/IB,utf @@ -1193,20 +1177,20 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c - d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 - \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 - \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 - \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf - \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe - \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd - \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc - \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb - \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa - \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /\x{100}*\S/IB,utf @@ -1219,20 +1203,20 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 - \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 - \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 - \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 - \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 - \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf - \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde - \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed - \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc - \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff Subject length lower bound = 1 /\x{100}*\W/IB,utf @@ -1245,18 +1229,18 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 - \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 - \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 - \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 - \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 - \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 - \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 - \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 - \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 + \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 + \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 + \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 + \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 + \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 + \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 + \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 + \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\x{105}-\x{109}]/IBi,utf @@ -1268,7 +1252,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 \x{104} 0: \x{104} @@ -1285,22 +1269,22 @@ No match /[z-\x{100}]/IBi,utf ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 - \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 - \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 - \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 - \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Subject length lower bound = 1 Z 0: Z @@ -1331,22 +1315,22 @@ No match /[z-\x{100}]/IBi,utf ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 - \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 - \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 - \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 - \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Subject length lower bound = 1 /\x{3a3}B/IBi,utf @@ -1359,13 +1343,13 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: \xff +Starting code units: \xff Last code unit = 'B' (caseless) Subject length lower bound = 2 /./utf \x{110000} -** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 +** Failed: character \N{U+110000} is greater than 0x10ffff and therefore cannot be encoded as UTF-16 /(*UTF)abý¿¿¿¿¿z/B ------------------------------------------------------------------ @@ -1381,7 +1365,7 @@ Subject length lower bound = 2 /[\W\p{Any}]/B ------------------------------------------------------------------ Bra - [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}] + AllAny Ket End ------------------------------------------------------------------ @@ -1393,7 +1377,7 @@ Subject length lower bound = 2 /[\W\pL]/B ------------------------------------------------------------------ Bra - [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}] + [^0-9_] Ket End ------------------------------------------------------------------ @@ -1410,7 +1394,7 @@ No match /[\s[:^ascii:]]/B,ucp ------------------------------------------------------------------ Bra - [\x80-\xff\p{Xsp}\x{100}-\x{ffff}] + [^\x00-\x08\x0e-\x1f!-\x7f] Ket End ------------------------------------------------------------------ @@ -1439,27 +1423,27 @@ Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowe ------------------------------------------------------------------ Bra ^ - [\x00-`c-\xff] (neg) + [^ab] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: utf Overall options: anchored utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e - \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d - \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac - \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb - \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca - \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 - \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 - \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 - \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 c 0: c @@ -1597,7 +1581,7 @@ No match A\x{d800}B No match A\x{110000}B -** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 +** Failed: character \N{U+110000} is greater than 0x10ffff and therefore cannot be encoded as UTF-16 /aa/utf,ucp,match_invalid_utf,global aa\x{d800}aa @@ -1612,6 +1596,36 @@ No match A\x{df00}\n No match +/ab$/match_invalid_utf +\= Expect no match + ab\x{df00}cde +No match + +/ab\z/match_invalid_utf +\= Expect no match + ab\x{df00}cde +No match + +/ab\Z/match_invalid_utf +\= Expect no match + ab\x{df00}cde +No match + +/(..)(*scs:(1)ab\z)/match_invalid_utf + ab\x{df00}cde + 0: ab + 1: ab + +/(..)(*scs:(1)ab\Z)/match_invalid_utf + ab\x{df00}cde + 0: ab + 1: ab + +/(..)(*scs:(1)ab$)/match_invalid_utf + ab\x{df00}cde + 0: ab + 1: ab + # ---------------------------------------------------- /(*UTF)(?=\x{123})/I @@ -1632,18 +1646,18 @@ Subject length lower bound = 3 /[\xff\x{ffff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 /[\xff\x{ff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 /[\xff\x{ff}]/I Capture group count = 0 -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 /[Ss]/I @@ -1654,25 +1668,25 @@ Subject length lower bound = 1 /[Ss]/I,utf Capture group count = 0 Options: utf -Starting code units: S s +Starting code units: S s Subject length lower bound = 1 /(?:\x{ff}|\x{3000})/I,utf Capture group count = 0 Options: utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 # ---------------------------------------------------- # UCP and casing tests -/\x{120}/i,I +/\x{120}/iI Capture group count = 0 Options: caseless First code unit = \x{120} Subject length lower bound = 1 -/\x{c1}/i,I,ucp +/\x{c1}/iI,ucp Capture group count = 0 Options: caseless ucp First code unit = \xc1 (caseless) @@ -1768,7 +1782,7 @@ Subject length lower bound = 2 /a|\x{c1}/iI,ucp Capture group count = 0 Options: caseless ucp -Starting code units: A a \xc1 \xe1 +Starting code units: A a \xc1 \xe1 Subject length lower bound = 1 \x{e1}xxx 0: \xe1 @@ -1805,8 +1819,141 @@ No match \x{17f} No match +/(.) \1/i,ucp + i I + 0: i I + 1: i + +/(.) \1/i,ucp,turkish_casing +\= Expect no match + i I +No match + +/(.) \1/i,ucp + i I + 0: i I + 1: i + \x{212a} k + 0: \x{212a} k + 1: \x{212a} +\= Expect no match + i \x{0130} +No match + \x{0131} I +No match + +/(.) \1/i,ucp,turkish_casing + \x{212a} k + 0: \x{212a} k + 1: \x{212a} + i \x{0130} + 0: i \x{130} + 1: i + \x{0131} I + 0: \x{131} I + 1: \x{131} +\= Expect no match + i I +No match + +/(.) (?r:\1)/i,ucp,turkish_casing + i I + 0: i I + 1: i +\= Expect no match + i \x{0130} +No match + \x{0131} I +No match + \x{212a} k +No match + +/[a-z][^i]I/ucp,turkish_casing + bII + 0: bII + b\x{0130}I + 0: b\x{130}I + b\x{0131}I + 0: b\x{131}I +\= Expect no match + biI +No match + +/[a-z][^i]I/i,ucp,turkish_casing + b\x{0131}I + 0: b\x{131}I + bII + 0: bII +\= Expect no match + biI +No match + b\x{0130}I +No match + +/[a-z](?r:[^i])I/i,ucp,turkish_casing + b\x{0131}I + 0: b\x{131}I + b\x{0130}I + 0: b\x{130}I +\= Expect no match + bII +No match + biI +No match + +/b(?r:[\x{00FF}-\x{FFEE}])/i,ucp,turkish_casing + b\x{0130} + 0: b\x{130} + b\x{0131} + 0: b\x{131} + B\x{212a} + 0: B\x{212a} +\= Expect no match + bi +No match + bI +No match + bk +No match + +/[\x60-\x7f]/i,ucp,turkish_casing + i + 0: i +\= Expect no match + I +No match + +/[\x60-\xc0]/i,ucp,turkish_casing + i + 0: i +\= Expect no match + I +No match + +/[\x80-\xc0]/i,ucp,turkish_casing +\= Expect no match + i +No match + I +No match + # ---------------------------------------------------- +/b[\x{00FF}-\x{FFEE}]/ir + b\x{0130} + 0: b\x{130} + b\x{0131} + 0: b\x{131} + B\x{212a} + 0: B\x{212a} +\= Expect no match + bi +No match + bI +No match + bk +No match + # Quantifier after a literal that has the value of META_ACCEPT (not UTF). This # fails in 16-bit mode, but is OK for 32-bit. @@ -1863,6 +2010,31 @@ Failed: error 134 at offset 12: character code point value in \x{} or \o{} is to ** Truncation will probably give the wrong result. No match +/[sk](?r:[sk])[sk]/Bi,ucp +------------------------------------------------------------------ + Bra + [KSks\x{17f}\x{212a}] + Bra + [KSks] + Ket + [KSks\x{17f}\x{212a}] + Ket + End +------------------------------------------------------------------ + SKS + 0: SKS + sks + 0: sks + \x{212a}S\x{17f} + 0: \x{212a}S\x{17f} + \x{17f}K\x{212a} + 0: \x{17f}K\x{212a} +\= Expect no match + s\x{212a}s +No match + K\x{17f}K +No match + # --------------------------------------------------------- # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 3c9586e..c9587ee 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -236,23 +236,23 @@ Subject length lower bound = 1 /[^ab\xC0-\xF0]/IB,utf ------------------------------------------------------------------ Bra - [\x00-`c-\xbf\xf1-\xff] (neg) + [^ab\xc0-\xf0] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e - \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d - \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac - \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb - \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb - \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff Subject length lower bound = 1 \x{f1} 0: \x{f1} @@ -268,22 +268,6 @@ No match \x{f0} No match -/Ā{3,4}/IB,utf ------------------------------------------------------------------- - Bra - \x{100}{3} - \x{100}?+ - Ket - End ------------------------------------------------------------------- -Capture group count = 0 -Options: utf -First code unit = \x{100} -Last code unit = \x{100} -Subject length lower bound = 3 - \x{100}\x{100}\x{100}\x{100\x{100} - 0: \x{100}\x{100}\x{100} - /(\x{100}+|x)/IB,utf ------------------------------------------------------------------ Bra @@ -297,7 +281,7 @@ Subject length lower bound = 3 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: x \xff +Starting code units: x \xff Subject length lower bound = 1 /(\x{100}*a|x)/IB,utf @@ -314,7 +298,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a x \xff +Starting code units: a x \xff Subject length lower bound = 1 /(\x{100}{0,2}a|x)/IB,utf @@ -331,7 +315,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a x \xff +Starting code units: a x \xff Subject length lower bound = 1 /(\x{100}{1,2}a|x)/IB,utf @@ -349,7 +333,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: x \xff +Starting code units: x \xff Subject length lower bound = 1 /\x{100}/IB,utf @@ -395,7 +379,7 @@ Subject length lower bound = 3 /[^\x{c4}]/IB ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -437,7 +421,7 @@ Subject length lower bound = 1 /[^\xff]/IB,utf ------------------------------------------------------------------ Bra - [^\x{ff}] + [^\x{ff}] (not) Ket End ------------------------------------------------------------------ @@ -574,7 +558,7 @@ Subject length lower bound = 3 /\h/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x20 \xa0 \xff +Starting code units: \x09 \x20 \xa0 \xff Subject length lower bound = 1 ABC\x{09} 0: \x{09} @@ -598,7 +582,7 @@ Subject length lower bound = 1 /\v/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 ABC\x{0a} 0: \x{0a} @@ -616,7 +600,7 @@ Subject length lower bound = 1 /\h*A/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x20 A \xa0 \xff +Starting code units: \x09 \x20 A \xa0 \xff Last code unit = 'A' Subject length lower bound = 1 CDBABC @@ -628,7 +612,7 @@ Subject length lower bound = 1 Capture group count = 0 Options: utf \R matches any Unicode newline -Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff Last code unit = 'A' Subject length lower bound = 1 CDBABC @@ -639,21 +623,21 @@ Subject length lower bound = 1 /\v+A/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Last code unit = 'A' Subject length lower bound = 2 /\s?xxx\s/I,utf Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x Last code unit = 'x' Subject length lower bound = 4 /\sxxx\s/I,utf,tables=2 Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 Last code unit = 'x' Subject length lower bound = 5 AB\x{85}xxx\x{a0}XYZ @@ -664,20 +648,20 @@ Subject length lower bound = 5 /\S \S/I,utf,tables=2 Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 - \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 - \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 - \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 - \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 + \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Last code unit = ' ' Subject length lower bound = 3 \x{a2} \x{84} @@ -729,7 +713,7 @@ Subject length lower bound = 2 /[^\x{c4}]/IB,utf ------------------------------------------------------------------ Bra - [^\x{c4}] + [^\x{c4}] (not) Ket End ------------------------------------------------------------------ @@ -754,7 +738,7 @@ Subject length lower bound = 2 /\R/I,utf Capture group count = 0 Options: utf -Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff Subject length lower bound = 1 # Check bad offset @@ -997,7 +981,7 @@ Subject length lower bound = 17 /[^â±¥]/Bi,utf ------------------------------------------------------------------ Bra - /i [^\x{2c65}] + /i [^\x{2c65}] (not) Ket End ------------------------------------------------------------------ @@ -1013,7 +997,7 @@ Subject length lower bound = 17 /\x{212a}+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: K k \xff +Starting code units: K k \xff Subject length lower bound = 1 KKkk\x{212a} 0: KKkk\x{212a} @@ -1021,7 +1005,7 @@ Subject length lower bound = 1 /s+/Ii,utf Capture group count = 0 Options: caseless utf -Starting code units: S s \xff +Starting code units: S s \xff Subject length lower bound = 1 SSss\x{17f} 0: SSss\x{17f} @@ -1044,7 +1028,7 @@ Failed: error 134 at offset 10: character code point value in \x{} or \o{} is to ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: A \xff +Starting code units: A \xff Last code unit = 'A' Subject length lower bound = 1 A @@ -1061,7 +1045,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff Subject length lower bound = 1 /[Z\x{100}]/IB,utf @@ -1073,7 +1057,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: Z \xff +Starting code units: Z \xff Subject length lower bound = 1 Z\x{100} 0: Z @@ -1091,15 +1075,15 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 - \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 - \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 - \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 - \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 - \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 - \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 - \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 - \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 + \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 + \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 + \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 + \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[z\Qa-d]Ā\E]/IB,utf @@ -1111,7 +1095,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: - ] a d z \xff +Starting code units: - ] a d z \xff Subject length lower bound = 1 \x{100} 0: \x{100} @@ -1132,7 +1116,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 1 Options: utf -Starting code units: a b \xff +Starting code units: a b \xff Last code unit = 'z' Subject length lower bound = 7 @@ -1146,7 +1130,7 @@ Subject length lower bound = 7 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff Subject length lower bound = 1 /\x{100}*\d/IB,utf @@ -1159,7 +1143,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff Subject length lower bound = 1 /\x{100}*\w/IB,utf @@ -1172,9 +1156,9 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - \xff +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xff Subject length lower bound = 1 /\x{100}*\D/IB,utf @@ -1187,20 +1171,20 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c - d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 - \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 - \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 - \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf - \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe - \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd - \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc - \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb - \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa - \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /\x{100}*\S/IB,utf @@ -1213,20 +1197,20 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 - \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 - \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 - \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 - \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 - \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf - \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde - \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed - \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc - \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff Subject length lower bound = 1 /\x{100}*\W/IB,utf @@ -1239,18 +1223,18 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 - \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 - \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 - \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 - \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 - \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 - \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 - \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 - \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 + \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 + \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 + \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 + \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 + \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 + \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 + \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 + \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\x{105}-\x{109}]/IBi,utf @@ -1262,7 +1246,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 \x{104} 0: \x{104} @@ -1279,22 +1263,22 @@ No match /[z-\x{100}]/IBi,utf ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 - \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 - \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 - \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 - \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Subject length lower bound = 1 Z 0: Z @@ -1325,22 +1309,22 @@ No match /[z-\x{100}]/IBi,utf ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 - \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 - \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 - \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 - \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 - \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 - \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 - \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef - \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe - \xff +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff Subject length lower bound = 1 /\x{3a3}B/IBi,utf @@ -1353,7 +1337,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf -Starting code units: \xff +Starting code units: \xff Last code unit = 'B' (caseless) Subject length lower bound = 2 @@ -1375,7 +1359,7 @@ Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defin /[\W\p{Any}]/B ------------------------------------------------------------------ Bra - [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}] + AllAny Ket End ------------------------------------------------------------------ @@ -1387,7 +1371,7 @@ Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defin /[\W\pL]/B ------------------------------------------------------------------ Bra - [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}] + [^0-9_] Ket End ------------------------------------------------------------------ @@ -1404,7 +1388,7 @@ No match /[\s[:^ascii:]]/B,ucp ------------------------------------------------------------------ Bra - [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}] + [^\x00-\x08\x0e-\x1f!-\x7f] Ket End ------------------------------------------------------------------ @@ -1436,27 +1420,27 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Bra ^ - [\x00-`c-\xff] (neg) + [^ab] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: utf Overall options: anchored utf -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e - \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d - \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac - \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb - \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca - \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 - \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 - \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 - \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 c 0: c @@ -1610,6 +1594,36 @@ No match A\x{df00}\n No match +/ab$/match_invalid_utf +\= Expect no match + ab\x{df00}cde +No match + +/ab\z/match_invalid_utf +\= Expect no match + ab\x{df00}cde +No match + +/ab\Z/match_invalid_utf +\= Expect no match + ab\x{df00}cde +No match + +/(..)(*scs:(1)ab\z)/match_invalid_utf + ab\x{df00}cde + 0: ab + 1: ab + +/(..)(*scs:(1)ab\Z)/match_invalid_utf + ab\x{df00}cde + 0: ab + 1: ab + +/(..)(*scs:(1)ab$)/match_invalid_utf + ab\x{df00}cde + 0: ab + 1: ab + # ---------------------------------------------------- /(*UTF)(?=\x{123})/I @@ -1630,18 +1644,18 @@ Subject length lower bound = 3 /[\xff\x{ffff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 /[\xff\x{ff}]/I,utf Capture group count = 0 Options: utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 /[\xff\x{ff}]/I Capture group count = 0 -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 /[Ss]/I @@ -1652,25 +1666,25 @@ Subject length lower bound = 1 /[Ss]/I,utf Capture group count = 0 Options: utf -Starting code units: S s +Starting code units: S s Subject length lower bound = 1 /(?:\x{ff}|\x{3000})/I,utf Capture group count = 0 Options: utf -Starting code units: \xff +Starting code units: \xff Subject length lower bound = 1 # ---------------------------------------------------- # UCP and casing tests -/\x{120}/i,I +/\x{120}/iI Capture group count = 0 Options: caseless First code unit = \x{120} Subject length lower bound = 1 -/\x{c1}/i,I,ucp +/\x{c1}/iI,ucp Capture group count = 0 Options: caseless ucp First code unit = \xc1 (caseless) @@ -1766,7 +1780,7 @@ Subject length lower bound = 2 /a|\x{c1}/iI,ucp Capture group count = 0 Options: caseless ucp -Starting code units: A a \xc1 \xe1 +Starting code units: A a \xc1 \xe1 Subject length lower bound = 1 \x{e1}xxx 0: \xe1 @@ -1803,8 +1817,141 @@ No match \x{17f} No match +/(.) \1/i,ucp + i I + 0: i I + 1: i + +/(.) \1/i,ucp,turkish_casing +\= Expect no match + i I +No match + +/(.) \1/i,ucp + i I + 0: i I + 1: i + \x{212a} k + 0: \x{212a} k + 1: \x{212a} +\= Expect no match + i \x{0130} +No match + \x{0131} I +No match + +/(.) \1/i,ucp,turkish_casing + \x{212a} k + 0: \x{212a} k + 1: \x{212a} + i \x{0130} + 0: i \x{130} + 1: i + \x{0131} I + 0: \x{131} I + 1: \x{131} +\= Expect no match + i I +No match + +/(.) (?r:\1)/i,ucp,turkish_casing + i I + 0: i I + 1: i +\= Expect no match + i \x{0130} +No match + \x{0131} I +No match + \x{212a} k +No match + +/[a-z][^i]I/ucp,turkish_casing + bII + 0: bII + b\x{0130}I + 0: b\x{130}I + b\x{0131}I + 0: b\x{131}I +\= Expect no match + biI +No match + +/[a-z][^i]I/i,ucp,turkish_casing + b\x{0131}I + 0: b\x{131}I + bII + 0: bII +\= Expect no match + biI +No match + b\x{0130}I +No match + +/[a-z](?r:[^i])I/i,ucp,turkish_casing + b\x{0131}I + 0: b\x{131}I + b\x{0130}I + 0: b\x{130}I +\= Expect no match + bII +No match + biI +No match + +/b(?r:[\x{00FF}-\x{FFEE}])/i,ucp,turkish_casing + b\x{0130} + 0: b\x{130} + b\x{0131} + 0: b\x{131} + B\x{212a} + 0: B\x{212a} +\= Expect no match + bi +No match + bI +No match + bk +No match + +/[\x60-\x7f]/i,ucp,turkish_casing + i + 0: i +\= Expect no match + I +No match + +/[\x60-\xc0]/i,ucp,turkish_casing + i + 0: i +\= Expect no match + I +No match + +/[\x80-\xc0]/i,ucp,turkish_casing +\= Expect no match + i +No match + I +No match + # ---------------------------------------------------- +/b[\x{00FF}-\x{FFEE}]/ir + b\x{0130} + 0: b\x{130} + b\x{0131} + 0: b\x{131} + B\x{212a} + 0: B\x{212a} +\= Expect no match + bi +No match + bI +No match + bk +No match + # Quantifier after a literal that has the value of META_ACCEPT (not UTF). This # fails in 16-bit mode, but is OK for 32-bit. @@ -1853,6 +2000,31 @@ No match Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z No match +/[sk](?r:[sk])[sk]/Bi,ucp +------------------------------------------------------------------ + Bra + [KSks\x{17f}\x{212a}] + Bra + [KSks] + Ket + [KSks\x{17f}\x{212a}] + Ket + End +------------------------------------------------------------------ + SKS + 0: SKS + sks + 0: sks + \x{212a}S\x{17f} + 0: \x{212a}S\x{17f} + \x{17f}K\x{212a} + 0: \x{17f}K\x{212a} +\= Expect no match + s\x{212a}s +No match + K\x{17f}K +No match + # --------------------------------------------------------- # End of testinput12 diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 index dd1a977..c0c2d90 100644 --- a/testdata/testoutput14-16 +++ b/testdata/testoutput14-16 @@ -31,7 +31,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 XX\x{dfff}\=no_utf_check 0: X XX\x{110000} -** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 +** Failed: character \N{U+110000} is greater than 0x10ffff and therefore cannot be encoded as UTF-16 XX\x{d800}\x{1234} Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 diff --git a/testdata/testoutput15 b/testdata/testoutput15 index 943ab72..e2407a7 100644 --- a/testdata/testoutput15 +++ b/testdata/testoutput15 @@ -21,7 +21,7 @@ Failed: error -63: heap limit exceeded /(a+)*zz/I Capture group count = 1 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits_noheap @@ -111,10 +111,10 @@ Minimum depth limit = 10 3: ee /(*LIMIT_MATCH=12bc)abc/ -Failed: error 160 at offset 17: (*VERB) not recognized or malformed +Failed: error 160 at offset 16: (*VERB) not recognized or malformed /(*LIMIT_MATCH=4294967290)abc/ -Failed: error 160 at offset 24: (*VERB) not recognized or malformed +Failed: error 160 at offset 23: (*VERB) not recognized or malformed /(*LIMIT_DEPTH=4294967280)abc/I Capture group count = 0 @@ -139,7 +139,7 @@ Failed: error -53: matching depth limit exceeded /(*LIMIT_MATCH=3000)(a+)*zz/I Capture group count = 1 Match limit = 3000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 \= Expect limit exceeded @@ -152,7 +152,7 @@ Failed: error -47: match limit exceeded /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I Capture group count = 1 Match limit = 3000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 \= Expect limit exceeded @@ -162,7 +162,7 @@ Failed: error -47: match limit exceeded /(*LIMIT_MATCH=60000)(a+)*zz/I Capture group count = 1 Match limit = 60000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 \= Expect no match @@ -175,7 +175,7 @@ Failed: error -47: match limit exceeded /(*LIMIT_DEPTH=10)(a+)*zz/I Capture group count = 1 Depth limit = 10 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 \= Expect limit exceeded @@ -188,7 +188,7 @@ Failed: error -53: matching depth limit exceeded /(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I Capture group count = 1 Depth limit = 1000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 \= Expect no match @@ -198,7 +198,7 @@ No match /(*LIMIT_DEPTH=1000)(a+)*zz/I Capture group count = 1 Depth limit = 1000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 \= Expect no match @@ -453,7 +453,7 @@ Failed: error -52: nested recursion at the same subject position /[^\xff]((?1))/BI ------------------------------------------------------------------ Bra - [^\x{ff}] + [^\x{ff}] (not) CBra 1 Recurse Ket @@ -477,8 +477,9 @@ Failed: error -52: nested recursion at the same subject position ------------------------------------------------------------------ Capture group count = 0 Options: no_auto_possess -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Optimizations: dotstar_anchor,start_optimize +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 abc\=callout_fail=1 --->abc @@ -501,8 +502,9 @@ No match Capture group count = 0 Compile options: Overall options: no_auto_possess -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Optimizations: dotstar_anchor,start_optimize +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 abc\=callout_fail=1 --->abc diff --git a/testdata/testoutput17 b/testdata/testoutput17 index 00c4bd4..95f3959 100644 --- a/testdata/testoutput17 +++ b/testdata/testoutput17 @@ -6,20 +6,20 @@ # JIT does not support this pattern (callout at start of condition). /(?(?C1)(?=a)a)/I -JIT compilation was not successful (no more memory) +JIT compilation was not successful (feature is not supported by the JIT compiler) Capture group count = 0 May match empty string Subject length lower bound = 0 -JIT compilation was not successful (no more memory) +JIT compilation was not successful (feature is not supported by the JIT compiler) # The following pattern cannot be compiled by JIT. /b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*/I -JIT compilation was not successful (no more memory) +JIT compilation was not successful (feature is not supported by the JIT compiler) Capture group count = 0 May match empty string Subject length lower bound = 0 -JIT compilation was not successful (no more memory) +JIT compilation was not successful (feature is not supported by the JIT compiler) # Check that an infinite recursion loop is caught. @@ -195,7 +195,7 @@ No match, mark = m (JIT) /(a+)*zz/I Capture group count = 1 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 JIT compilation was successful @@ -298,7 +298,7 @@ Failed: error -47: match limit exceeded /(*LIMIT_MATCH=3000)(a+)*zz/I Capture group count = 1 Match limit = 3000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 JIT compilation was successful @@ -312,7 +312,7 @@ Failed: error -47: match limit exceeded /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I Capture group count = 1 Match limit = 3000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 JIT compilation was successful @@ -323,7 +323,7 @@ Failed: error -47: match limit exceeded /(*LIMIT_MATCH=60000)(a+)*zz/I Capture group count = 1 Match limit = 60000 -Starting code units: a z +Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 JIT compilation was successful @@ -506,7 +506,7 @@ Failed: error -46: JIT stack limit reached /[^\xff]((?1))/BI ------------------------------------------------------------------ Bra - [^\x{ff}] + [^\x{ff}] (not) CBra 1 Recurse Ket diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 8375668..de4752e 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -92,7 +92,7 @@ Subject length lower bound = 3 /a*bc/I Capture group count = 0 -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 2 @@ -172,7 +172,7 @@ Subject length lower bound = 1 /cat|dog|elephant/I Capture group count = 0 -Starting code units: c d e +Starting code units: c d e Subject length lower bound = 3 this sentence eventually mentions a cat 0: cat @@ -181,7 +181,7 @@ Subject length lower bound = 3 /cat|dog|elephant/I Capture group count = 0 -Starting code units: c d e +Starting code units: c d e Subject length lower bound = 3 this sentence eventually mentions a cat 0: cat @@ -191,7 +191,7 @@ Subject length lower bound = 3 /cat|dog|elephant/Ii Capture group count = 0 Options: caseless -Starting code units: C D E c d e +Starting code units: C D E c d e Subject length lower bound = 3 this sentence eventually mentions a CAT cat 0: CAT @@ -200,30 +200,30 @@ Subject length lower bound = 3 /a|[bcd]/I Capture group count = 0 -Starting code units: a b c d +Starting code units: a b c d Subject length lower bound = 1 /(a|[^\dZ])/I Capture group count = 1 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y [ \ ] ^ _ ` a b c d - e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 - \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 - \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 - \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 - \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf - \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce - \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd - \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec - \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb - \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y [ \ ] ^ _ ` a b c d + e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 + \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff Subject length lower bound = 1 /(a|b)*[\s]/I Capture group count = 1 -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b Subject length lower bound = 1 /(ab\2)/ @@ -354,7 +354,7 @@ Failed: error 111 at offset 4: unrecognized character after (? or (?- /^abc|def/I Capture group count = 0 -Starting code units: a d +Starting code units: a d Subject length lower bound = 3 abcdef 0: abc @@ -383,20 +383,20 @@ Failed: error 106 at offset 4: missing terminating ] for character class /[^aeiou ]{3,}/I Capture group count = 0 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 - 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ - \ ] ^ _ ` b c d f g h j k l m n p q r s t v w x y z { | } ~ \x7f \x80 \x81 - \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 - \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f - \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae - \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd - \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc - \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb - \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea - \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 - \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 + 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ + \ ] ^ _ ` b c d f g h j k l m n p q r s t v w x y z { | } ~ \x7f \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 3 co-processors, and for 0: -pr @@ -503,7 +503,7 @@ Subject length lower bound = 4 Capture group count = 1 Compile options: Overall options: anchored -Starting code units: D b d +Starting code units: D b d Subject length lower bound = 1 /(?s).*/I @@ -515,17 +515,17 @@ Subject length lower bound = 0 /[abcd]/I Capture group count = 0 -Starting code units: a b c d +Starting code units: a b c d Subject length lower bound = 1 /(?i)[abcd]/I Capture group count = 0 -Starting code units: A B C D a b c d +Starting code units: A B C D a b c d Subject length lower bound = 1 /(?m)[xy]|(b|c)/I Capture group count = 1 -Starting code units: b c x y +Starting code units: b c x y Subject length lower bound = 1 /(^a|^b)/Im @@ -553,13 +553,13 @@ Failed: error 124 at offset 4: missing closing parenthesis for condition Failed: error 124 at offset 4: missing closing parenthesis for condition /(?(?i))/ -Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) /(?(abc))/ Failed: error 115 at offset 3: reference to non-existent subpattern /(?(? Overall options: anchored -Starting code units: a b +Starting code units: a b Subject length lower bound = 4 adef\=get=1,get=2,get=3,get=4,getall 0: adef @@ -1313,25 +1313,25 @@ Subject length lower bound = 3 /abc|bac/I Capture group count = 0 -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 3 /(abc|bac)/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 3 /(abc|(c|dc))/I Capture group count = 2 -Starting code units: a c d +Starting code units: a c d Last code unit = 'c' Subject length lower bound = 1 /(abc|(d|de)c)/I Capture group count = 2 -Starting code units: a d +Starting code units: a d Last code unit = 'c' Subject length lower bound = 2 @@ -1347,7 +1347,7 @@ Subject length lower bound = 1 /(baa|a+)/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Last code unit = 'a' Subject length lower bound = 1 @@ -1379,8 +1379,8 @@ Subject length lower bound = 1 /a|ba|\w/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /abc(?=pqr)/I @@ -1440,7 +1440,7 @@ Subject length lower bound = 1 /(a.c*|b.c*)/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /a.c*|aba/I @@ -1766,33 +1766,33 @@ Subject length lower bound = 2 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:^alnum:]]/IB ------------------------------------------------------------------ Bra ^ - [\x00-/:-@[-`{-\xff] (neg) + [^0-9A-Za-z] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > - ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 - \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 - \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 - \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 - \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 - \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 - \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 - \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 - \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 + \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 + \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 + \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 + \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[[:alpha:]]/IB @@ -1806,40 +1806,40 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:^alpha:]]/IB ------------------------------------------------------------------ Bra ^ - [\x00-@[-`{-\xff] (neg) + [^A-Za-z] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 - \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 - \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 - \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 - \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 - \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf - \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde - \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed - \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc - \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff Subject length lower bound = 1 /[_[:alpha:]]/I Capture group count = 0 -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:ascii:]]/IB @@ -1853,34 +1853,34 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ - \x7f +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ + \x7f Subject length lower bound = 1 /^[[:^ascii:]]/IB ------------------------------------------------------------------ Bra ^ - [\x80-\xff] (neg) + [^\x00-\x7f] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a - \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 - \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 - \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 - \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 - \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 - \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 - \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 - \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a + \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 + \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 + \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 + \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 + \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 + \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 + \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 + \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[[:blank:]]/IB @@ -1894,40 +1894,40 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x09 \x20 +Starting code units: \x09 \x20 Subject length lower bound = 1 /^[[:^blank:]]/IB ------------------------------------------------------------------ Bra ^ - [\x00-\x08\x0a-\x1f!-\xff] (neg) + [^\x09 ] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b - \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a - \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 - : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ - _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 - \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f - \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e - \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad - \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc - \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb - \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda - \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 - \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 - \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[\n\x0b\x0c\x0d[:blank:]]/I Capture group count = 0 Contains explicit CR or LF match -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 Subject length lower bound = 1 /^[[:cntrl:]]/IB @@ -1941,9 +1941,9 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x7f +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x7f Subject length lower bound = 1 /^[[:digit:]]/IB @@ -1957,7 +1957,7 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Subject length lower bound = 1 /^[[:graph:]]/IB @@ -1971,9 +1971,9 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : - ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ - ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ +Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : + ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Subject length lower bound = 1 /^[[:lower:]]/IB @@ -1987,7 +1987,7 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:print:]]/IB @@ -2001,9 +2001,9 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 - 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] - ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Subject length lower bound = 1 /^[[:punct:]]/IB @@ -2017,8 +2017,8 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ - _ ` { | } ~ +Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ + _ ` { | } ~ Subject length lower bound = 1 /^[[:space:]]/IB @@ -2032,7 +2032,7 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 Subject length lower bound = 1 /^[[:upper:]]/IB @@ -2046,7 +2046,7 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Subject length lower bound = 1 /^[[:xdigit:]]/IB @@ -2060,7 +2060,7 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f Subject length lower bound = 1 /^[[:word:]]/IB @@ -2074,87 +2074,87 @@ Subject length lower bound = 1 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /^[[:^cntrl:]]/IB ------------------------------------------------------------------ Bra ^ - [ -~\x80-\xff] (neg) + [^\x00-\x1f\x7f] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 - 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] - ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81 - \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 - \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f - \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae - \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd - \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc - \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb - \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea - \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 - \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[12[:^digit:]]/IB ------------------------------------------------------------------ Bra ^ - [\x00-/12:-\xff] (neg) + [^03-9] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; < - = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a - b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 - \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 - \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 - \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf - \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe - \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd - \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc - \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb - \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa - \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; < + = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a + b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /^[[:^blank:]]/IB ------------------------------------------------------------------ Bra ^ - [\x00-\x08\x0a-\x1f!-\xff] (neg) + [^\x09 ] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: Overall options: anchored -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b - \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a - \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 - : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ - _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 - \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f - \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e - \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad - \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc - \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb - \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda - \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 - \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 - \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 /[01[:alpha:]%]/IB @@ -2165,24 +2165,24 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W - X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W + X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /[[.ch.]]/I -Failed: error 113 at offset 1: POSIX collating elements are not supported +Failed: error 113 at offset 7: POSIX collating elements are not supported /[[=ch=]]/I -Failed: error 113 at offset 1: POSIX collating elements are not supported +Failed: error 113 at offset 7: POSIX collating elements are not supported /[[:rhubarb:]]/I -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 12: unknown POSIX class name /[[:upper:]]/Ii Capture group count = 0 Options: caseless -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 A 0: A @@ -2192,8 +2192,8 @@ Subject length lower bound = 1 /[[:lower:]]/Ii Capture group count = 0 Options: caseless -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 A 0: A @@ -2203,7 +2203,7 @@ Subject length lower bound = 1 /((?-i)[[:lower:]])[[:lower:]]/Ii Capture group count = 1 Options: caseless -Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 2 ab 0: ab @@ -2228,7 +2228,7 @@ Failed: error 115 at offset 5: reference to non-existent subpattern /(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/I Capture group count = 271 Max back reference = 270 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Subject length lower bound = 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC @@ -2705,31 +2705,31 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 Subject length lower bound = 1 /[\S]/IB ------------------------------------------------------------------ Bra - [\x00-\x08\x0e-\x1f!-\xff] (neg) + [^\x09-\x0d ] Ket End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f - \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e - \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C - D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h - i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 - \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 - \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 - \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 - \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 - \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf - \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde - \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed - \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc - \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff Subject length lower bound = 1 /a(?i)b/IB @@ -2913,8 +2913,8 @@ Subject length lower bound = 3 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 \= Expect no match xxxxx @@ -2942,6 +2942,47 @@ Capture group count = 0 First code unit = 'x' Subject length lower bound = 1 +# For comparison with the following test, which disables auto-possessification +# In this regex, x+ should be converted to x++ +/x+y/B,auto_possess +------------------------------------------------------------------ + Bra + x++ + y + Ket + End +------------------------------------------------------------------ + +# In this regex, x+ should not be converted to x++ +/x+y/B,auto_possess_off +------------------------------------------------------------------ + Bra + x+ + y + Ket + End +------------------------------------------------------------------ + +# Also in this regex, x+ should not be converted to x++ +/x+y/B,optimization_none +------------------------------------------------------------------ + Bra + x+ + y + Ket + End +------------------------------------------------------------------ + +# In this one too, x+ should not be converted to x++ +/x+y/B,no_auto_possess +------------------------------------------------------------------ + Bra + x+ + y + Ket + End +------------------------------------------------------------------ + /x{1,3}+/B,no_auto_possess ------------------------------------------------------------------ Bra @@ -2963,8 +3004,8 @@ Subject length lower bound = 1 /[^x]{1,3}+/B,no_auto_possess ------------------------------------------------------------------ Bra - [^x] - [^x]{0,2}+ + [^x] (not) + [^x]{0,2}+ (not) Ket End ------------------------------------------------------------------ @@ -2972,11 +3013,24 @@ Subject length lower bound = 1 /[^x]{1,3}+/Bi,no_auto_possess ------------------------------------------------------------------ Bra - /i [^x] - /i [^x]{0,2}+ + /i [^x] (not) + /i [^x]{0,2}+ (not) + Ket + End +------------------------------------------------------------------ + +/x{1,3}+/IB,auto_possess_off +------------------------------------------------------------------ + Bra + x + x{0,2}+ Ket End ------------------------------------------------------------------ +Capture group count = 0 +Optimizations: dotstar_anchor,start_optimize +First code unit = 'x' +Subject length lower bound = 1 /(x)*+/IB ------------------------------------------------------------------ @@ -3007,7 +3061,7 @@ No match /(\d++)(\w)/I Capture group count = 2 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Subject length lower bound = 2 12345a 0: 12345a @@ -3045,20 +3099,20 @@ Subject length lower bound = 2 /([^()]++|\([^()]*\))+/I Capture group count = 1 -Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( * + , - . / 0 1 2 3 4 5 - 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e - \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d - \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac - \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb - \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca - \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 - \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 - \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 - \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( * + , - . / 0 1 2 3 4 5 + 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff Subject length lower bound = 1 ((abc(ade)ufh()()x 0: abc(ade)ufh()()x @@ -3249,6 +3303,15 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, /\N{25,ab}/ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u +/[\N]/ +Failed: error 171 at offset 3: \N is not supported in a class + +/[\N{4}]/ +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/[\N{name}]/ +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + /a{1,3}b/ungreedy ab 0: ab @@ -3270,7 +3333,7 @@ Failed: error 106 at offset 10: missing terminating ] for character class End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 Subject length lower bound = 1 /[[:space:]]/IB @@ -3281,7 +3344,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 Subject length lower bound = 1 /[[:space:]abcde]/IB @@ -3292,7 +3355,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b c d e +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b c d e Subject length lower bound = 1 /< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/Ix @@ -3489,12 +3552,12 @@ Subject length lower bound = 1 /[ab]/I Capture group count = 0 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 /[ab]/I Capture group count = 0 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 /[^a]/I @@ -3503,13 +3566,13 @@ Subject length lower bound = 1 /\d456/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = '6' Subject length lower bound = 4 /\d456/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = '6' Subject length lower bound = 4 @@ -3535,18 +3598,18 @@ No match /c|abc/I Capture group count = 0 -Starting code units: a c +Starting code units: a c Last code unit = 'c' Subject length lower bound = 1 /(?i)[ab]/I Capture group count = 0 -Starting code units: A B a b +Starting code units: A B a b Subject length lower bound = 1 /[ab](?i)cd/I Capture group count = 0 -Starting code units: a b +Starting code units: a b Last code unit = 'd' (caseless) Subject length lower bound = 3 @@ -3584,7 +3647,7 @@ Subject length lower bound = 6 /(?C1)\dabc(?C2)def/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = 'f' Subject length lower bound = 7 1234abcdef @@ -3601,7 +3664,7 @@ No match /(?C1)\dabc(?C2)def/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = 'f' Subject length lower bound = 7 1234abcdef @@ -3867,12 +3930,12 @@ Subject length lower bound = 3 /(?C)a|b/I Capture group count = 0 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 /a|(b)(?C)/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 b --->b @@ -3984,7 +4047,7 @@ Subject length lower bound = 2 Capture group count = 2 Compile options: Overall options: anchored -Starting code units: ( - 0 1 2 3 4 5 6 7 8 9 +Starting code units: ( - 0 1 2 3 4 5 6 7 8 9 Subject length lower bound = 1 12 0: 12 @@ -4074,7 +4137,7 @@ Subject length lower bound = 9 Capture group count = 1 Compile options: Overall options: anchored -Starting code units: a b c +Starting code units: a b c Subject length lower bound = 2 a=a 0: a=a @@ -4090,7 +4153,7 @@ Subject length lower bound = 2 Capture group count = 2 Compile options: Overall options: anchored -Starting code units: a b c +Starting code units: a b c Subject length lower bound = 2 a=a 0: a=a @@ -4435,8 +4498,8 @@ Subject length lower bound = 8 0: abcdefgh 1: cd 2: gh -Number not found for group 'three' -Copy substring 'three' failed (-49): unknown substring +Number not found for group "three" +Copy substring "three" failed (-49): unknown substring /(?P)(?P)/IB ------------------------------------------------------------------ @@ -4743,7 +4806,7 @@ No match ------------------------------------------------------------------ Capture group count = 0 Options: auto_callout -Starting code units: a b +Starting code units: a b Last code unit = 'b' Subject length lower bound = 1 ab @@ -4786,7 +4849,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: auto_callout -Starting code units: a b +Starting code units: a b Last code unit = 'b' Subject length lower bound = 1 ab @@ -4886,7 +4949,7 @@ No match ------------------------------------------------------------------ Capture group count = 1 Options: auto_callout -Starting code units: a d +Starting code units: a d Last code unit = 'x' Subject length lower bound = 4 abcx @@ -4960,7 +5023,7 @@ No match ------------------------------------------------------------------ Capture group count = 1 Options: auto_callout -Starting code units: a d +Starting code units: a d Last code unit = 'x' Subject length lower bound = 4 abcx @@ -5008,7 +5071,7 @@ No match /(ab|cd){3,4}/I,auto_callout Capture group count = 1 Options: auto_callout -Starting code units: a c +Starting code units: a c Subject length lower bound = 6 ababab --->ababab @@ -5098,7 +5161,7 @@ Subject length lower bound = 6 ------------------------------------------------------------------ Capture group count = 1 Options: auto_callout -Starting code units: a b x +Starting code units: a b x Subject length lower bound = 2 \= Expect no match Note: that {,} does NOT introduce a quantifier @@ -5146,7 +5209,7 @@ No match ------------------------------------------------------------------ Capture group count = 1 Options: auto_callout -Starting code units: a b x +Starting code units: a b x Subject length lower bound = 2 \= Expect no match Note: that {,} does NOT introduce a quantifier @@ -5247,7 +5310,7 @@ No match ------------------------------------------------------------------ Capture group count = 1 Options: auto_callout -Starting code units: a b x +Starting code units: a b x Last code unit = '3' Subject length lower bound = 11 aacaacaacaacaac123 @@ -5323,7 +5386,7 @@ No match Capture group count = 3 Compile options: Overall options: anchored -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = '/' Subject length lower bound = 6 13/05/04\=ps @@ -5377,31 +5440,31 @@ No match /0{0,2}ABC/I Capture group count = 0 -Starting code units: 0 A +Starting code units: 0 A Last code unit = 'C' Subject length lower bound = 3 /\d{3,}ABC/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = 'C' Subject length lower bound = 6 /\d*ABC/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A +Starting code units: 0 1 2 3 4 5 6 7 8 9 A Last code unit = 'C' Subject length lower bound = 3 /[abc]+DE/I Capture group count = 0 -Starting code units: a b c +Starting code units: a b c Last code unit = 'E' Subject length lower bound = 3 /[abc]?123/I Capture group count = 0 -Starting code units: 1 a b c +Starting code units: 1 a b c Last code unit = '3' Subject length lower bound = 3 123\=ps @@ -5421,7 +5484,7 @@ Partial match: c12 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = 'X' Subject length lower bound = 4 1\=ps @@ -5506,13 +5569,13 @@ Subject length lower bound = 4 No match /a{11111111111111111111}/I -Failed: error 105 at offset 8: number too big in {} quantifier +Failed: error 105 at offset 22: number too big in {} quantifier /(){64294967295}/I -Failed: error 105 at offset 9: number too big in {} quantifier +Failed: error 105 at offset 14: number too big in {} quantifier /(){2,4294967295}/I -Failed: error 105 at offset 11: number too big in {} quantifier +Failed: error 105 at offset 15: number too big in {} quantifier "(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I Capture group count = 1 @@ -5812,8 +5875,8 @@ Subject length lower bound = 2 0: a1 1: a1 2: a1 -Number not found for group 'Z' -Copy substring 'Z' failed (-49): unknown substring +Number not found for group "Z" +Copy substring "Z" failed (-49): unknown substring C a1 (2) A (non-unique) /(?|(?)(?)(?)|(?)(?)(?))/I,dupnames @@ -5847,7 +5910,7 @@ Named capture groups: A 1 A 2 Options: dupnames -Starting code units: a c +Starting code units: a c Subject length lower bound = 2 ab\=copy=A 0: ab @@ -5856,7 +5919,7 @@ Subject length lower bound = 2 C a (1) A (non-unique) cd\=copy=A 0: cd -Copy substring 'A' failed (-55): requested value is not set +Copy substring "A" failed (-55): requested value is not set /^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames Capture group count = 4 @@ -5866,7 +5929,7 @@ Named capture groups: A 3 A 4 Options: dupnames -Starting code units: a c +Starting code units: a c Subject length lower bound = 2 cdefgh\=copy=A 0: cdefgh @@ -5900,8 +5963,8 @@ Subject length lower bound = 2 0: a1 1: a1 2: a1 -Number not found for group 'Z' -Get substring 'Z' failed (-49): unknown substring +Number not found for group "Z" +Get substring "Z" failed (-49): unknown substring G a1 (2) A (non-unique) /^(?Pa)(?Pb)/I,dupnames @@ -5925,7 +5988,7 @@ Named capture groups: A 1 A 2 Options: dupnames -Starting code units: a c +Starting code units: a c Subject length lower bound = 2 ab\=get=A 0: ab @@ -5934,7 +5997,7 @@ Subject length lower bound = 2 G a (1) A (non-unique) cd\=get=A 0: cd -Get substring 'A' failed (-55): requested value is not set +Get substring "A" failed (-55): requested value is not set /^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames Capture group count = 4 @@ -5944,7 +6007,7 @@ Named capture groups: A 3 A 4 Options: dupnames -Starting code units: a c +Starting code units: a c Subject length lower bound = 2 cdefgh\=get=A 0: cdefgh @@ -6098,7 +6161,7 @@ Subject length lower bound = 2 /\s*,\s*/I Capture group count = 0 -Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 , +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 , Last code unit = ',' Subject length lower bound = 1 \x0b,\x0b @@ -6160,7 +6223,7 @@ No match No match /^abc/Im,newline=bad -** Invalid value in 'newline=bad' +** Invalid value in "newline=bad" /.*/I,newline=lf Capture group count = 0 @@ -6204,8 +6267,8 @@ Subject length lower bound = 0 /\w+(.)(.)?def/Is Capture group count = 2 Options: dotall -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Last code unit = 'f' Subject length lower bound = 5 abc\ndef @@ -6223,7 +6286,7 @@ Subject length lower bound = 5 Capture group count = 1 Named capture groups: B 1 -Starting code units: 0 1 2 3 4 5 6 7 8 9 +Starting code units: 0 1 2 3 4 5 6 7 8 9 Last code unit = '.' Subject length lower bound = 7 @@ -6343,42 +6406,42 @@ Subject length lower bound = 2 /(a*b|(?i:c*(?-i)d))/I Capture group count = 1 -Starting code units: C a b c d +Starting code units: C a b c d Subject length lower bound = 1 /()[ab]xyz/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Last code unit = 'z' Subject length lower bound = 4 /(|)[ab]xyz/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Last code unit = 'z' Subject length lower bound = 4 /(|c)[ab]xyz/I Capture group count = 1 -Starting code units: a b c +Starting code units: a b c Last code unit = 'z' Subject length lower bound = 4 /(|c?)[ab]xyz/I Capture group count = 1 -Starting code units: a b c +Starting code units: a b c Last code unit = 'z' Subject length lower bound = 4 /(d?|c?)[ab]xyz/I Capture group count = 1 -Starting code units: a b c d +Starting code units: a b c d Last code unit = 'z' Subject length lower bound = 4 /(d?|c)[ab]xyz/I Capture group count = 1 -Starting code units: a b c d +Starting code units: a b c d Last code unit = 'z' Subject length lower bound = 4 @@ -6395,7 +6458,7 @@ Subject length lower bound = 4 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: a b +Starting code units: a b Last code unit = 'b' Subject length lower bound = 2 @@ -6412,7 +6475,7 @@ Subject length lower bound = 2 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: a b +Starting code units: a b Last code unit = 'b' Subject length lower bound = 2 @@ -6429,7 +6492,7 @@ Subject length lower bound = 2 Capture group count = 0 Compile options: Overall options: anchored -Starting code units: a b +Starting code units: a b Last code unit = 'b' Subject length lower bound = 2 @@ -6468,8 +6531,7 @@ No match Capture group count = 0 Compile options: caseless Overall options: anchored caseless -Starting code units: A a -Last code unit = 'A' (caseless) +First code unit = 'A' (caseless) Subject length lower bound = 2 aaaA5 0: aaaA5 @@ -6480,22 +6542,22 @@ Subject length lower bound = 2 /(a*|b*)[cd]/I Capture group count = 1 -Starting code units: a b c d +Starting code units: a b c d Subject length lower bound = 1 /(a+|b*)[cd]/I Capture group count = 1 -Starting code units: a b c d +Starting code units: a b c d Subject length lower bound = 1 /(a*|b+)[cd]/I Capture group count = 1 -Starting code units: a b c d +Starting code units: a b c d Subject length lower bound = 1 /(a+|b+)[cd]/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( @@ -7071,7 +7133,7 @@ Matched, but too many substrings /[^a]+a/B ------------------------------------------------------------------ Bra - [^a]++ + [^a]++ (not) a Ket End @@ -7080,7 +7142,7 @@ Matched, but too many substrings /[^a]+a/Bi ------------------------------------------------------------------ Bra - /i [^a]++ + /i [^a]++ (not) /i a Ket End @@ -7089,7 +7151,7 @@ Matched, but too many substrings /[^a]+A/Bi ------------------------------------------------------------------ Bra - /i [^a]++ + /i [^a]++ (not) /i A Ket End @@ -7098,7 +7160,7 @@ Matched, but too many substrings /[^a]+b/B ------------------------------------------------------------------ Bra - [^a]+ + [^a]+ (not) b Ket End @@ -7107,7 +7169,7 @@ Matched, but too many substrings /[^a]+\d/B ------------------------------------------------------------------ Bra - [^a]+ + [^a]+ (not) \d Ket End @@ -7117,7 +7179,7 @@ Matched, but too many substrings ------------------------------------------------------------------ Bra a*+ - [^a] + [^a] (not) Ket End ------------------------------------------------------------------ @@ -7296,7 +7358,7 @@ No match ^ CBra 1 Cond - 2 Cond ref + 2 Capture ref y Ket [()] @@ -7389,8 +7451,8 @@ No match /(?=(\w+))\1:/I Capture group count = 1 Max back reference = 1 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Last code unit = ':' Subject length lower bound = 2 abcd: @@ -7402,8 +7464,8 @@ Capture group count = 1 Max back reference = 1 Named capture groups: abc 1 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Last code unit = ':' Subject length lower bound = 2 abcd: @@ -7820,7 +7882,7 @@ Failed: error 115 at offset 5: reference to non-existent subpattern abc Ket Cond - 1 Cond ref + 1 Capture ref X Alt Y @@ -7843,7 +7905,7 @@ No match ^ CBra 1 Cond - 2 Cond ref + 2 Capture ref X Alt Y @@ -7875,7 +7937,7 @@ Failed: error 115 at offset 5: reference to non-existent subpattern Bra CBra 1 Cond - 1 Cond ref + 1 Capture ref a Ket Ket @@ -7891,7 +7953,7 @@ Failed: error 115 at offset 6: reference to non-existent subpattern Bra ^ Cond - 1 Cond ref + 1 Capture ref X Alt Y @@ -8375,7 +8437,7 @@ No match Failed: error 166 at offset 7: (*MARK) must have an argument /\g6666666666/ -Failed: error 161 at offset 7: subpattern number is too big +Failed: error 161 at offset 12: subpattern number is too big /[\g6666666666]/B ------------------------------------------------------------------ @@ -8713,31 +8775,31 @@ Failed: error 162 at offset 4: subpattern name expected Failed: error 162 at offset 4: subpattern name expected /[[:foo:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 8: unknown POSIX class name /[[:1234:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 9: unknown POSIX class name /[[:f\oo:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 9: unknown POSIX class name /[[: :]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 6: unknown POSIX class name /[[:...:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 8: unknown POSIX class name /[[:l\ower:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 11: unknown POSIX class name /[[:abc\:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 9: unknown POSIX class name /[abc[:x\]pqr:]]/ -Failed: error 130 at offset 6: unknown POSIX class name +Failed: error 130 at offset 14: unknown POSIX class name /[[:a\dz:]]/ -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 9: unknown POSIX class name /(^(a|b\g<-1'c))/ Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number @@ -8756,6 +8818,17 @@ No match cat No match +/cat[]/B,allow_empty_class +------------------------------------------------------------------ + Bra + cat + [] + Ket + End +------------------------------------------------------------------ + cat\=ph +Partial match: cat + /(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames cat 0: a @@ -8826,9 +8899,8 @@ No match No match /a[]*+b/allow_empty_class,match_unset_backref,dupnames -\= Expect no match ab -No match + 0: ab /a[^]b/allow_empty_class,match_unset_backref,dupnames aXb @@ -8876,7 +8948,7 @@ Failed: error 109 at offset 8: quantifier does not follow a repeatable item /(abc|pqr|123){0}[xyz]/I Capture group count = 1 -Starting code units: x y z +Starting code units: x y z Subject length lower bound = 1 /(?(?=.*b)b|^)/I,auto_callout @@ -9372,10 +9444,10 @@ Partial match: ab Cond false CBra 1 < - [^m] - [^>] + [^m] (not) + [^>] (not) > - [^<] + [^<] (not) Ket CBra 2 \w*+ @@ -9394,10 +9466,10 @@ Partial match: ab Cond false CBra 1 < - [\x00-/:-\xff] (neg) - [^>] + [^0-9] + [^>] (not) > - [^<] + [^<] (not) Ket CBra 2 \w*+ @@ -9660,42 +9732,42 @@ Failed: error 125 at offset 0: length of lookbehind assertion is not limited /(a|bc)\1/I Capture group count = 1 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /(a|bc)\1{2,3}/I Capture group count = 1 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 3 /(a|bc)(?1)/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /(a|b\1)(a|b\1)/I Capture group count = 2 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /(a|b\1){2}/I Capture group count = 1 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /(a|bbbb\1)(a|bbbb\1)/I Capture group count = 2 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /(a|bbbb\1){2}/I Capture group count = 1 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 2 /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/I @@ -9727,12 +9799,12 @@ Subject length lower bound = 8 /(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/Ii Capture group count = 1 Options: caseless -Starting code units: A B C a b c +Starting code units: A B C a b c Subject length lower bound = 1 /(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/I Capture group count = 0 -Starting code units: c d +Starting code units: c d Last code unit = 'b' Subject length lower bound = 41 @@ -9768,7 +9840,7 @@ Subject length lower bound = 2 Capture group count = 1 Named capture groups: a 1 -Starting code units: A B +Starting code units: A B Subject length lower bound = 1 AB\=copy=a 0: A @@ -9795,7 +9867,7 @@ Named capture groups: realquote 3 realquote 6 Options: dupnames extended -Starting code units: a b +Starting code units: a b Subject length lower bound = 3 a"aaaaa 0: a"aaaaa @@ -9816,6 +9888,43 @@ No match a"11111 No match +/(?:a(?[0-5])|b(?[4-7]))c(?()d|e)/B,dupnames +------------------------------------------------------------------ + Bra + Bra + a + CBra 1 + [0-5] + Ket + Alt + b + CBra 2 + [4-7] + Ket + Ket + c + Cond + Capture ref 2 + d + Alt + e + Ket + Ket + End +------------------------------------------------------------------ + a4cd + 0: a4cd + 1: 4 + b4cd + 0: b4cd + 1: + 2: 4 +\= Expect no match + a6cd +No match + a6ce +No match + /^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames ------------------------------------------------------------------ Bra @@ -9839,7 +9948,7 @@ No match Ket Ket Cond - Cond ref 2 + Capture ref 2 X Alt Y @@ -9854,7 +9963,7 @@ Named capture groups: D 1 Compile options: dupnames extended Overall options: anchored dupnames extended -Starting code units: a e +Starting code units: a e Subject length lower bound = 2 abcdX 0: abcdX @@ -10437,7 +10546,7 @@ No match abc\=offset=4 Failed: error -33: bad offset value abc\=offset=-4 -** Invalid value in 'offset=-4' +** Invalid value in "offset=-4" \= Valid data abc\=offset=0 0: abc @@ -10595,7 +10704,7 @@ Subject length lower bound = 5 /([abc])++1234/I Capture group count = 1 -Starting code units: a b c +Starting code units: a b c Last code unit = '4' Subject length lower bound = 5 @@ -10747,9 +10856,8 @@ Subject length lower bound = 1 /(?=a{3})[bcd]/Ii Capture group count = 0 Options: caseless -First code unit = 'a' (caseless) -Last code unit = 'a' (caseless) -Subject length lower bound = 2 +First code unit = 'A' (caseless) +Subject length lower bound = 1 /(abc)\1+/ @@ -10897,12 +11005,12 @@ Subject length lower bound = 0 /(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I Capture group count = 2 -Starting code units: a b +Starting code units: a b Subject length lower bound = 3 /(a(?2)|b)(b(?1)|a)(?1)(?2)/I Capture group count = 2 -Starting code units: a b +Starting code units: a b Subject length lower bound = 4 /(abc)(?1)/I @@ -11200,7 +11308,7 @@ Subject length lower bound = 18 /\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|abc)(?z)\k()/IB,dupnames @@ -12974,11 +13082,11 @@ Subject length lower bound = 5 [a-d]{5,12}+ [0-9e-z]*+ # - [\x00-`{-\xff] (neg)++ + [^a-z]++ [b-y]*+ a [2-7]?+ - [\x00-/:-`{-\xff] (neg)++ + [^0-9a-z]++ Ket End ------------------------------------------------------------------ @@ -13132,7 +13240,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: a b c d +Starting code units: a b c d Last code unit = 'd' Subject length lower bound = 1 @@ -13145,7 +13253,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: a b c +Starting code units: a b c Last code unit = 'd' Subject length lower bound = 2 @@ -13158,7 +13266,7 @@ Subject length lower bound = 2 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: a b c d +Starting code units: a b c d Last code unit = 'd' Subject length lower bound = 1 @@ -13171,7 +13279,7 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: a b c +Starting code units: a b c Last code unit = 'd' Subject length lower bound = 5 @@ -13184,7 +13292,7 @@ Subject length lower bound = 5 End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: a b c d +Starting code units: a b c d Last code unit = 'd' Subject length lower bound = 1 @@ -13204,7 +13312,7 @@ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing? Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?) /^A\x{/ -Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} +Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{U+} /[ab]++/B,no_auto_possess ------------------------------------------------------------------ @@ -13217,7 +13325,7 @@ Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} /[^ab]*+/B,no_auto_possess ------------------------------------------------------------------ Bra - [\x00-`c-\xff] (neg)*+ + [^ab]*+ Ket End ------------------------------------------------------------------ @@ -13239,16 +13347,16 @@ Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} ------------------------------------------------------------------ /[a-[:digit:]]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 12: invalid range in character class /[A-[:digit:]]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 12: invalid range in character class /[a-[.xxx.]]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 10: invalid range in character class /[a-[=xxx=]]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 10: invalid range in character class /[a-[!xxx!]]+/ Failed: error 108 at offset 3: range out of order in character class @@ -13368,8 +13476,31 @@ No match Alfred Winifred No match +/[[:<:]]+red/B +------------------------------------------------------------------ + Bra + \b + Assert + \w + Ket + Brazero + Assert + \w + Ket + red + Ket + End +------------------------------------------------------------------ + little red riding hood + 0: red + red is a colour + 0: red +\= Expect no match + Alfred +No match + /[a[:<:]] should give error/ -Failed: error 130 at offset 4: unknown POSIX class name +Failed: error 130 at offset 7: unknown POSIX class name /(?=ab\K)/aftertext,allow_lookaround_bsk abcd\=startchar @@ -13416,15 +13547,22 @@ Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) Failed: error 155 at offset 2: missing opening brace after \o /\o{}/ -Failed: error 178 at offset 3: digits missing in \x{} or \o{} or \N{U+} +Failed: error 178 at offset 3: digits missing after \x or in \x{} or \o{} or \N{U+} /\o{whatever}/ Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?) /\xthing/ +Failed: error 178 at offset 2: digits missing after \x or in \x{} or \o{} or \N{U+} + +/^A\xZ/ +Failed: error 178 at offset 4: digits missing after \x or in \x{} or \o{} or \N{U+} + +/^A\x/ +Failed: error 178 at offset 4: digits missing after \x or in \x{} or \o{} or \N{U+} /\x{}/ -Failed: error 178 at offset 3: digits missing in \x{} or \o{} or \N{U+} +Failed: error 178 at offset 3: digits missing after \x or in \x{} or \o{} or \N{U+} /\x{whatever}/ Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?) @@ -13532,7 +13670,7 @@ Capture group count = 1 Max back reference = 1 Named capture groups: VERSION 1 -Starting code units: a x +Starting code units: a x Subject length lower bound = 5 abcyes 0: abcyes @@ -13557,19 +13695,30 @@ Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION con /(?(VERSION=10.101)yes|no)/ Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition +# We should see the starting code unit, required code unit, and minimum length set for this regex: /abcd/I Capture group count = 0 First code unit = 'a' Last code unit = 'd' Subject length lower bound = 4 +# None of the following three should have the starting code unit, required code unit, and minimum length set: /abcd/I,no_start_optimize Capture group count = 0 Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor + +/abcd/I,start_optimize_off +Capture group count = 0 +Optimizations: auto_possess,dotstar_anchor + +/abcd/I,optimization_none +Capture group count = 0 +Optimizations: /(|ab)*?d/I Capture group count = 1 -Starting code units: a d +Starting code units: a d Last code unit = 'd' Subject length lower bound = 1 abd @@ -13581,6 +13730,7 @@ Subject length lower bound = 1 /(|ab)*?d/I,no_start_optimize Capture group count = 1 Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor abd 0: abd 1: ab @@ -13654,7 +13804,7 @@ Failed: error -49 at offset 36 in replacement: unknown substring /abc/replace=a${A23456789012345678901234567890123}z 123abc -Failed: error -35 at offset 35 in replacement: invalid replacement string +Failed: error -49 at offset 37 in replacement: unknown substring /abc/replace=a${bcd 123abc @@ -13752,7 +13902,66 @@ Failed: error -48: no more memory: 23 code units are needed apple lemon blackberry 3: pear orange strawberry +/"(*:fruit" 00 "juice)apple"/hex,g,replace=${*MARK} + apple lemon blackberry + 1: fruit\x00juice lemon blackberry + +/abc/ + 123abc123\=replace=XYZ + 1: 123XYZ123 + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 +\= Expect error + 123abc123\=replace=[9]XYZ +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]XYZ +Failed: error -48: no more memory: 10 code units are needed + /abc/ + 123abc123\=replace=XY + 1: 123XY123 + 123abc123\=replace=[9]XY + 1: 123XY123 + 123abc123\=replace=[9]XY,substitute_literal + 1: 123XY123 +\= Expect error + 123abc123\=replace=[8]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[8]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[6]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[6]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[5]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[5]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[4]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[4]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[3]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[3]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[2]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[2]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + +/abc/substitute_literal + 123abc123\=replace=XYZ + 1: 123XYZ123 + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 +\= Expect error 123abc123\=replace=[9]XYZ Failed: error -48: no more memory 123abc123\=substitute_overflow_length,replace=[9]XYZ @@ -13776,6 +13985,134 @@ Failed: error -48: no more memory: 10 code units are needed 123abc123\=substitute_overflow_length,replace=[0]x$1z Failed: error -48: no more memory: 10 code units are needed +/a(b)c/substitute_extended + ZabcZ\=replace=>\1< + 1: Z>b\2< +Failed: error -49 at offset 3 in replacement: unknown substring + ZabcZ\=replace=>\8< +Failed: error -49 at offset 3 in replacement: unknown substring + ZabcZ\=replace=>${1}< + 1: Z>b${ 1 }< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${2}< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>${8}< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>$<1>< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>$< 1 >< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$<2>< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>$<8>< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>\g<-1>< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<0>< + 1: Z>abc\g<1>< + 1: Z>b\g< 1 >< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<2>< +Failed: error -49 at offset 6 in replacement: unknown substring + ZabcZ\=replace=>\g<8>< +Failed: error -49 at offset 6 in replacement: unknown substring + +/(*:pear)apple/substitute_extended + ZappleZ\=replace=>${*MARK}< + 1: Z>pear$<*MARK>< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZappleZ\=replace=>\g<*MARK>< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/a(?b)c/substitute_extended + ZabcZ\=replace=>${named}< + 1: Z>b${noexist}< +Failed: error -49 at offset 11 in replacement: unknown substring + ZabcZ\=replace=>${}< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${ }< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${ named }< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$< + 1: Z>b$< +Failed: error -49 at offset 11 in replacement: unknown substring + ZabcZ\=replace=>$<>< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$< >< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$< named >< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>\g< + 1: Z>b\g< +Failed: error -49 at offset 12 in replacement: unknown substring + ZabcZ\=replace=>\g<>< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g< >< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g< named >< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/a(b)c/substitute_extended + ZabcZ\=replace=>${1:+ yes : no } + 1: Z> yes Z + ZabcZ\=replace=>${1:+ \o{100} : \o{100} } + 1: Z> @ Z + ZabcZ\=replace=>${1:+ \o{Z} : no } +Failed: error -57 at offset 9 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ yes : \o{Z} } +Failed: error -57 at offset 15 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ \g<1> : no } + 1: Z> b Z + ZabcZ\=replace=>${1:+ yes : \g<1> } + 1: Z> yes Z + ZabcZ\=replace=>${1:+ \g<1 : no } +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ yes : \g<1 } +Failed: error -57 at offset 14 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ $<1> : no } +Failed: error -49 at offset 11 in replacement: unknown substring + ZabcZ\=replace=>${1:+ yes : $<1> } + 1: Z> yes Z + ZabcZ\=replace=>${1:+ $<1 : no } +Failed: error -35 at offset 10 in replacement: invalid replacement string + ZabcZ\=replace=>${1:+ yes : $<1 } + 1: Z> yes Z + +/a(b)c/substitute_extended + ZabcZ\=replace=>${ +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${1 +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + ZabcZ\=replace=>${1Z +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + ZabcZ\=replace=>${1; +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + ZabcZ\=replace=>$< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$<1 +Failed: error -35 at offset 4 in replacement: invalid replacement string + ZabcZ\=replace=>$<1Z +Failed: error -35 at offset 5 in replacement: invalid replacement string + ZabcZ\=replace=>$<1; +Failed: error -35 at offset 4 in replacement: invalid replacement string + ZabcZ\=replace=>\g< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<1 +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<1Z +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<1; +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + "((?=(?(?=(?(?=(?(?=()))))))))" a 0: @@ -13817,9 +14154,9 @@ Start of matched string is beyond its end - displaying from end to start. a\=ovector=1,copy=A,get=A,get=2 Matched, but too many substrings 0: a -Copy substring 'A' failed (-54): requested value is not available +Copy substring "A" failed (-54): requested value is not available Get substring 2 failed (-54): requested value is not available -Get substring 'A' failed (-54): requested value is not available +Get substring "A" failed (-54): requested value is not available a\=ovector=2,copy=A,get=A,get=2 0: a 1: a @@ -13830,9 +14167,9 @@ Get substring 2 failed (-54): requested value is not available Matched, but too many substrings 0: b 1: -Copy substring 'A' failed (-55): requested value is not set +Copy substring "A" failed (-55): requested value is not set Get substring 2 failed (-54): requested value is not available -Get substring 'A' failed (-55): requested value is not set +Get substring "A" failed (-55): requested value is not set /a(b)c(d)/ abc\=ph,copy=0,copy=1,getall @@ -13852,9 +14189,81 @@ Subject length lower bound = 3 Capture group count = 0 Compile options: no_dotstar_anchor Overall options: anchored no_dotstar_anchor +Optimizations: auto_possess,start_optimize First code unit = 'a' Subject length lower bound = 3 +/^abc/info,dotstar_anchor_off +Capture group count = 0 +Compile options: +Overall options: anchored +Optimizations: auto_possess,start_optimize +First code unit = 'a' +Subject length lower bound = 3 + +# For comparison with the following tests, which disable automatic dotstar anchoring +/.*abc/BI +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + +/.*abc/BI,dotstar_anchor_off +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: auto_possess,start_optimize +Last code unit = 'c' +Subject length lower bound = 3 + +/.*abc/BI,start_optimize_off +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: auto_possess,dotstar_anchor + +/.*abc/BI,optimization_none +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: + +/.*abc/BI,no_dotstar_anchor +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_dotstar_anchor +Optimizations: auto_possess,start_optimize +Last code unit = 'c' +Subject length lower bound = 3 + /.*\d/info,auto_callout Capture group count = 0 Options: auto_callout @@ -13873,6 +14282,7 @@ No match /.*\d/info,no_dotstar_anchor,auto_callout Capture group count = 0 Options: auto_callout no_dotstar_anchor +Optimizations: auto_possess,start_optimize Subject length lower bound = 1 \= Expect no match aaa @@ -13900,12 +14310,14 @@ Subject length lower bound = 1 /.*\d/dotall,no_dotstar_anchor,info Capture group count = 0 Options: dotall no_dotstar_anchor +Optimizations: auto_possess,start_optimize Subject length lower bound = 1 /(*NO_DOTSTAR_ANCHOR)(?s).*\d/info Capture group count = 0 Compile options: Overall options: no_dotstar_anchor +Optimizations: auto_possess,start_optimize Subject length lower bound = 1 '^(?:(a)|b)(?(1)A|B)' @@ -14242,7 +14654,7 @@ Failed: error -52: nested recursion at the same subject position # Perl fails to diagnose the absence of an assertion "(?(?.*!.*)?)" -Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) "X((?2)()*+){2}+"B ------------------------------------------------------------------ @@ -14401,7 +14813,7 @@ Named capture groups: a 2 b 3 Options: dupnames -Starting code units: 2 3 +Starting code units: 2 3 Subject length lower bound = 1 A23B 0: 2 @@ -14479,7 +14891,7 @@ Failed: error 162 at offset 4: subpattern name expected "(?J:(?|(?'R')(\k'R')|((?'R'))))" /(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ -Failed: error 161 at offset 17: subpattern number is too big +Failed: error 161 at offset 32: subpattern number is too big /^(?:(?(1)x|)+)+$()/B ------------------------------------------------------------------ @@ -14487,7 +14899,7 @@ Failed: error 161 at offset 17: subpattern number is too big ^ SBra SCond - 1 Cond ref + 1 Capture ref x Alt KetRmax @@ -14543,7 +14955,7 @@ Failed: error 162 at offset 9: subpattern name expected Capture group count = 1 Named capture groups: abc 1 -Starting code units: : [ \ +Starting code units: : [ \ Subject length lower bound = 2 "[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" @@ -14594,17 +15006,17 @@ Subject length lower bound = 65535 /(?|(aaa)|(b))\g{1}/I Capture group count = 1 Max back reference = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 /(?|(aaa)|(b))(?1)/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 4 /(?|(aaa)|(b))/I Capture group count = 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 /(?|(?'a'aaa)|(?'a'b))\k'a'/I @@ -14612,7 +15024,7 @@ Capture group count = 1 Max back reference = 1 Named capture groups: a 1 -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 /(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/I,dupnames @@ -14622,7 +15034,7 @@ Named capture groups: a 1 a 2 Options: dupnames -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 5 @@ -14670,7 +15082,7 @@ No match 0: ab /(?(8000000000/ -Failed: error 161 at offset 8: subpattern number is too big +Failed: error 161 at offset 13: subpattern number is too big /((?(R8000000000)))/ Failed: error 161 at offset 9: subpattern number is too big @@ -14797,7 +15209,7 @@ No match /abcd/null_context abcd\=null_context 0: abcd -\= Expect error - not allowed together +\= Expect not allowed together abcd\=null_context,find_limits ** Not allowed together: find_limits null_context abcd\=allusedtext,startchar @@ -14819,6 +15231,10 @@ No match abcDE 1: aBcBCbcdEdeabAByzDone +/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended + Hello between wORLD + 2: >hELLO< between >World< + /abcd/replace=xy\kz,substitute_extended abcd Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string @@ -14899,6 +15315,56 @@ Failed: error -55 at offset 3 in replacement: requested value is not set >abcd1234abcd5678<\=replace=wxyz,substitute_matched 2: >wxyz1234wxyz5678< +/abc/substitute_extended,replace=>\045< + abc + 1: >%< + +/abc/substitute_extended,replace=>\45< + abc + 1: >%< + +/abc/substitute_extended,replace=>\o{45}< + abc + 1: >%< + +/abc/substitute_extended,replace=>\845< + abc +Failed: error -49 at offset 5 in replacement: unknown substring + +/a(b)(c)/substitute_extended,replace=>\1< + abc + 1: >b< + +/a(b)(c)/substitute_extended,replace=>\2< + abc + 1: >c< + +/a(b)(c)/substitute_extended,replace=>\3< + abc +Failed: error -49 at offset 3 in replacement: unknown substring + +/a(?b)c/substitute_extended + abc\=replace=>${namED_1}< + 1: >b< + +/a(?b)c/substitute_extended + abc\=replace=>${namedverylongbutperfectlylegalsoyoushouldnthaveaproblem_1}< + 1: >b< + +/abc/substitute_extended + abc\=replace=\a\b\e\f\n\r\t\v\\ + 1: \x07\x08\x1b\x0c\x0a\x0d\x09\x0b\ + +/a(b)c/ + LabcR\=replace=>$&< + 1: L>abc$`< + 1: L>L$'< + 1: L>R$_< + 1: L>LabcR@-BD-xz-\xff] (neg) + [^!'-*;?Cy] {1; CBra 1 \x08 @@ -15337,19 +15803,19 @@ Failed: error -55 at offset 3 in replacement: requested value is not set /()()()/use_offset_limit \=ovector=11000000000 -** Invalid value in 'ovector=11000000000' +** Invalid value in "ovector=11000000000" \=callout_fail=11000000000 -** Invalid value in 'callout_fail=11000000000' +** Invalid value in "callout_fail=11000000000" \=callout_fail=1:11000000000 -** Invalid value in 'callout_fail=1:11000000000' +** Invalid value in "callout_fail=1:11000000000" \=callout_data=11000000000 -** Invalid value in 'callout_data=11000000000' +** Invalid value in "callout_data=11000000000" \=callout_data=-11000000000 -** Invalid value in 'callout_data=-11000000000' +** Invalid value in "callout_data=-11000000000" \=offset_limit=1100000000000000000000 -** Invalid value in 'offset_limit=1100000000000000000000' +** Invalid value in "offset_limit=1100000000000000000000" \=copy=11000000000 -** Invalid value in 'copy=11000000000' +** Invalid value in "copy=11000000000" /(*MARK:A\x00b)/mark abc @@ -15491,11 +15957,11 @@ Failed: error 125 at offset 13: length of lookbehind assertion is not limited # Perl accepts these, but gives a warning. We can't warn, so give an error. /[a-[:digit:]]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 12: invalid range in character class a-a9-a /[A-[:digit:]]+/ -Failed: error 150 at offset 4: invalid range in character class +Failed: error 150 at offset 12: invalid range in character class A-A9-A /[a-\d]+/ @@ -15523,7 +15989,7 @@ Failed: error 150 at offset 5: invalid range in character class abc Ket Cond - 1 Cond ref + 1 Capture ref xyz Ket Ket @@ -15554,15 +16020,15 @@ Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, // \=get=i00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 -** Group name in 'get' is too long +** Group name in "get" is too long \=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 -** Too many characters in named 'get' modifiers +** Too many characters in named "get" modifiers "(?(?C))" -Failed: error 128 at offset 6: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 6: atomic assertion expected after (?( or (?(?C) /(?(?(?(?(?(?))))))/ -Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) /(?<=(?1))((?s))/anchored @@ -15628,14 +16094,14 @@ Subject length lower bound = 1 # behave unpredictably at match time. /.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ -Failed: error 128 at offset 63: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 63: atomic assertion expected after (?( or (?(?C) .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X /[:[:alnum:]-[[a:lnum:]+/ -Failed: error 150 at offset 11: invalid range in character class +Failed: error 150 at offset 12: invalid range in character class /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ -Failed: error 128 at offset 11: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 11: atomic assertion expected after (?( or (?(?C) /((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ @@ -15681,7 +16147,7 @@ Subject length lower bound = 11 // \=ovector=7777777777 -** Invalid value in 'ovector=7777777777' +** Invalid value in "ovector=7777777777" # This is here because Perl matches, even though a COMMIT is encountered # outside of the recursion. @@ -15873,6 +16339,18 @@ First code unit = 'x' Last code unit = 'x' Subject length lower bound = 3 +/(*LIMIT_HEAP=123/use_length +Failed: error 160 at offset 16: (*VERB) not recognized or malformed + +/(*LIMIT_MATCH=/use_length +Failed: error 160 at offset 14: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_DEPTH=/use_length +Failed: error 160 at offset 21: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length +Failed: error 160 at offset 34: (*VERB) not recognized or malformed + /\d{0,3}(*:abc)(?C1)xxx/callout_info Callout 1 x @@ -16145,7 +16623,7 @@ Failed: error -37: callout error code ------------------------------------------------------------------ Capture group count = 0 Options: extended_more -Starting code units: a b c +Starting code units: a b c Subject length lower bound = 1 /[a b c]/BxxxI @@ -16157,7 +16635,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capture group count = 0 Options: extended extended_more -Starting code units: a b c +Starting code units: a b c Subject length lower bound = 1 /[a b c]/B,extended_more @@ -16230,7 +16708,7 @@ Subject length lower bound = 1 # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. /\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal -** Unrecognized modifier '\' in '\bad_escape_is_literal' +** Unrecognized modifier "\bad_escape_is_literal" /\N{\c/IB,bad_escape_is_literal ------------------------------------------------------------------ @@ -16254,10 +16732,10 @@ Subject length lower bound = 3 ------------------------------------------------------------------ /[Q-\N]/B,bad_escape_is_literal -Failed: error 150 at offset 5: invalid range in character class +Failed: error 171 at offset 5: \N is not supported in a class /[\s-_]/bad_escape_is_literal -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[_-\s]/bad_escape_is_literal Failed: error 150 at offset 5: invalid range in character class @@ -16348,7 +16826,7 @@ Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL Capture group count = 0 Max lookbehind = 1 Extra options: match_word -Starting code units: c d +Starting code units: c d Subject length lower bound = 3 the cat sat 0: cat @@ -16379,9 +16857,54 @@ No match syndicate No match -/a whole line/match_line,multiline - Rhubarb \na whole line\n custard - 0: a whole line +# Confirm that the pcre2_set_optimize API does not have any undesired effect on literal patterns +/(cat)|dog/I,literal,auto_possess_off +Capture group count = 0 +Options: literal +Optimizations: dotstar_anchor,start_optimize +First code unit = '(' +Last code unit = 'g' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + +/(cat)|dog/I,literal,dotstar_anchor_off +Capture group count = 0 +Options: literal +Optimizations: auto_possess,start_optimize +First code unit = '(' +Last code unit = 'g' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + +/(cat)|dog/I,literal,optimization_none +Capture group count = 0 +Options: literal +Optimizations: + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + +# These should result in errors, since it is not permitted to use the +# PCRE2_NO_AUTO_POSSESS and PCRE2_NO_DOTSTAR_ANCHOR options on a literal pattern +/(cat)|dog/literal,no_auto_possess +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/(cat)|dog/literal,no_dotstar_anchor +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard + 0: a whole line \= Expect no match Not a whole line No match @@ -16412,19 +16935,19 @@ No match No match /[[:digit:]-a]/ -Failed: error 150 at offset 10: invalid range in character class +Failed: error 150 at offset 11: invalid range in character class /[[:digit:]-[:print:]]/ -Failed: error 150 at offset 10: invalid range in character class +Failed: error 150 at offset 11: invalid range in character class /[\d-a]/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[\H-z]/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class /[\d-[:print:]]/ -Failed: error 150 at offset 3: invalid range in character class +Failed: error 150 at offset 4: invalid range in character class # Perl gets the second of these wrong, giving no match. @@ -16896,6 +17419,94 @@ Subject length lower bound = 1 1(2) Old 2 5 "abc" New 2 7 " STOPPED" 1: 12abc34xyz +/a(b)c/substitute_overflow_length,substitute_callout,replace=[1]12 + abc\=substitute_skip=1 +Failed: error -48: no more memory: 4 code units are needed + abc +Failed: error -48: no more memory: 4 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory: 4 code units are needed + abc + 1(2) Old 0 3 "abc" New 0 2 "12" +Failed: error -48: no more memory: 3 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory: 4 code units are needed + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]1234 + abc\=substitute_skip=1 +Failed: error -48: no more memory: 5 code units are needed + abc +Failed: error -48: no more memory: 5 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]1234 + abc\=substitute_skip=1 +Failed: error -48: no more memory: 5 code units are needed + abc +Failed: error -48: no more memory: 5 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]1234 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 4 "1234 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 4 "1234" +Failed: error -48: no more memory: 5 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[5]1234 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 4 "1234 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 4 "1234" + 1: 1234 + +/a(b)c/substitute_callout,replace=[1]12 + abc\=substitute_skip=1 +Failed: error -48: no more memory + abc +Failed: error -48: no more memory + +/a(b)c/substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory + abc + 1(2) Old 0 3 "abc" New 0 2 "12" +Failed: error -48: no more memory + +/a(b)c/substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/a(b)c/substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + /abc\rdef/ abc\ndef No match @@ -16908,13 +17519,13 @@ No match No match /(?(*ACCEPT)xxx)/ -Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) /(?(*atomic:xx)xxx)/ -Failed: error 128 at offset 10: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 10: atomic assertion expected after (?( or (?(?C) /(?(*script_run:xxx)zzz)/ -Failed: error 128 at offset 14: assertion expected after (?( or (?(?C) +Failed: error 128 at offset 14: atomic assertion expected after (?( or (?(?C) /foobar/ the foobar thing\=copy_matched_subject @@ -17006,14 +17617,14 @@ Subject length lower bound = 4 /(?|(a)|(bcde))(c)\2/I Capture group count = 2 Max back reference = 2 -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 3 /(?|(a)|(bcde))(c)\1/I Capture group count = 2 Max back reference = 1 -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 2 @@ -17025,7 +17636,7 @@ Named capture groups: A 3 B 2 Options: dupnames -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 3 @@ -17037,13 +17648,13 @@ Named capture groups: A 3 B 2 Options: dupnames -Starting code units: a b +Starting code units: a b Last code unit = 'c' Subject length lower bound = 2 /((a|)+)+Z/I Capture group count = 2 -Starting code units: Z a +Starting code units: Z a Last code unit = 'Z' Subject length lower bound = 1 @@ -17123,7 +17734,7 @@ Failed: error 187 at offset 10: lookbehind assertion is too long # Expect error: not allowed as a condition /(?(*napla:xx)bc)/ -Failed: error 198 at offset 9: atomic assertion expected after (?( or (?(?C) +Failed: error 128 at offset 9: atomic assertion expected after (?( or (?(?C) /\A(*pla:.*\b(\w++))(?>.*?\b\1\b){3}/ word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 @@ -17224,7 +17835,7 @@ Capture group count = 0 May match empty string Compile options: Overall options: anchored -Starting code units: x y +Starting code units: x y Subject length lower bound = 1 /(*napla:abc|abd)/I @@ -17355,7 +17966,7 @@ Subject length lower bound = 1 a Ket Cond - 1 Cond ref + 1 Capture ref b Ket CBra 2 @@ -17373,7 +17984,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Bra Cond - 1 Cond ref + 1 Capture ref b Ket CBra 1 @@ -17481,8 +18092,7 @@ Subject length lower bound = 5 /(a)?a/I Capture group count = 1 -Starting code units: a -Last code unit = 'a' +First code unit = 'a' Subject length lower bound = 1 manm 0: a @@ -17618,6 +18228,23 @@ Failed: error -55 at offset 3 in replacement: requested value is not set XabcY\=replace= 0: abc +/abc/replace=\U$0,substitute_extended,substitute_case_callout + XabcY + 1: XBbKY +\= Expect not supported + XabcY\=null_context +** Replacement case callouts are not supported with null_context. + +/a/substitute_extended,substitute_case_callout + XaY\=replace=\U$0 + 1: XBY + XaY\=replace=\L$0 + 1: XaY + XaY\=replace=\u\L$0 + 1: XBY + XaY\=replace=\l\U$0 + 1: XaY + # Expect non-fixed-length error "(?<=X(?(DEFINE)(.*))(?1))." @@ -17932,9 +18559,9 @@ No match abc\x0adef 0: \x0a -# This test is currently broken in the interpreter -# /|a(?0)/endanchored -# aaaa +/|a(?0)/endanchored + aaaa + 0: aaaa /A +/extended @@ -17983,7 +18610,7 @@ No match /a?(?=b(*COMMIT)c|)d/I Capture group count = 0 -Starting code units: a d +Starting code units: a d Last code unit = 'd' Subject length lower bound = 1 bd @@ -17999,19 +18626,21 @@ Subject length lower bound = 1 /a?(?=b(*COMMIT)c|)d/I,no_start_optimize Capture group count = 0 Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor bd No match /(?=b(*COMMIT)c|)d/I,no_start_optimize Capture group count = 0 Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor bd No match /a?(?=bc|)d/I,auto_callout Capture group count = 0 Options: auto_callout -Starting code units: a d +Starting code units: a d Last code unit = 'd' Subject length lower bound = 1 bd @@ -18027,7 +18656,7 @@ Subject length lower bound = 1 /a?(?=bc|)\bd/I Capture group count = 0 Max lookbehind = 1 -Starting code units: a d +Starting code units: a d Last code unit = 'd' Subject length lower bound = 1 bd @@ -18058,8 +18687,3139 @@ Failed: error -47: match limit exceeded .abc def.. No match +/(*MARK:a/y_)/debug +** Unrecognized modifier 'y' in modifier string "y_)/debug" + +//i,sr +** Unrecognized modifier "sr" + +# The behaviour of these tests is different from Perl because PCRE2 doesn't +# recognize \Q or \E within a quantifier, so these examples are not treated +# as quantifiers. Subsequent processing of the string removes the escapes. + +/a{\Q1\E,2}/ + xa{1,2}x + 0: a{1,2} +\= Expect no match + xaax +No match + +/a{\E1,2}/ + xa{1,2}x + 0: a{1,2} +\= Expect no match + xaax +No match + +# -------------- + +/(?<=|b?)./B +------------------------------------------------------------------ + Bra + Assert back + Alt + VReverse + b? + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?=|b?)./B +------------------------------------------------------------------ + Bra + Assert + Alt + b?+ + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?>|b?)./B +------------------------------------------------------------------ + Bra + Once + Alt + b?+ + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?<=xy|a.b?|cd)/B +------------------------------------------------------------------ + Bra + Assert back + Reverse + xy + Alt + VReverse + a + Any + b? + Alt + Reverse + cd + Ket + Ket + End +------------------------------------------------------------------ + +# Tests for scan substring, a non Perl feature of PCRE2 + +# Parse errors first + +/(*scs:/ +Failed: error 114 at offset 6: missing closing parenthesis + +/(*scan_substring:(/ +Failed: error 114 at offset 18: missing closing parenthesis + +/(*scs:('name'/ +Failed: error 114 at offset 13: missing closing parenthesis + +/(*scs:(1)a|b)/ +Failed: error 115 at offset 7: reference to non-existent subpattern + +/(*scs:(0)a)/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/(*scan_substring:(1)a|b)/ +Failed: error 115 at offset 18: reference to non-existent subpattern + +/(*scs:()a|b)/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/(*scan_substring:()a|b)/ +Failed: error 115 at offset 19: reference to non-existent subpattern + +/()(*scs:(1)+a)/ +Failed: error 109 at offset 11: quantifier does not follow a repeatable item + +/()(*scs:(1,1,1,1,1,1,1,1,2))/ +Failed: error 115 at offset 25: reference to non-existent subpattern + +/()()(*scs:(1,2,1,2,1,2,2,'XYZ'))/ +Failed: error 115 at offset 26: reference to non-existent subpattern + +# Tests for iterating scan_substring + +/(a)(*scs:(1)b)*c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)*?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)*+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)+?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)++c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Scan substring + 1 Capture ref + b + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)??c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)?+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b){3}c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b){3,5}?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Braminzero + Bra + Scan substring + 1 Capture ref + b + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b){3,}+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(\w++)=(?(*scs:(1)(abc))pqr|xyz)(\w++)/ +Failed: error 128 at offset 14: atomic assertion expected after (?( or (?(?C) + +# Tests for scan_substring + +/([a-z]++)(*scs:(1)(stx)|(ne))(.)/B +------------------------------------------------------------------ + Bra + CBra 1 + [a-z]++ + Ket + Scan substring + 1 Capture ref + CBra 2 + stx + Ket + Alt + CBra 3 + ne + Ket + Ket + CBra 4 + Any + Ket + Ket + End +------------------------------------------------------------------ + ##string##next!## + 0: next! + 1: next + 2: + 3: ne + 4: ! + __aastxaa:__ + 0: stxaa: + 1: stxaa + 2: stx + 3: + 4: : + __abababab:__ +No match + +/(?[a-z]++)##(*scan_substring:('XX').*(..)$)\2/B +------------------------------------------------------------------ + Bra + CBra 1 + [a-z]++ + Ket + ## + Scan substring + 1 Capture ref + Any* + CBra 2 + Any + Any + Ket + $ + Ket + \2 + Ket + End +------------------------------------------------------------------ + ##abcd##abcd##cd## + 0: abcd##cd + 1: abcd + 2: cd + ##abcd##abcd##abcd## +No match + +/([a-z])([a-z]++)(#+)(*scs:(2)(ab.))/ + xab## +No match + yabc### + 0: yabc### + 1: y + 2: abc + 3: ### + 4: abc + zababc#### + 0: zababc#### + 1: z + 2: ababc + 3: #### + 4: aba + +/(?:(?[a-z]++)|(?[0-9]++)|$)(*scan_substring:('YYY')((?.).*\k$))/dupnames + $$abacd$$112345$$abca$$ + 0: abca + 1: abca + 2: + 3: abca + 4: a + $$abcdeaf$$1234567819$$123456781$$ + 0: 123456781 + 1: + 2: 123456781 + 3: 123456781 + 4: 1 + +/([a-zA-Z]+)(*scs:(1).*?(?[A-Z]+)(*scan_substring:('ABC').*(.)\3))#+/ + ##abABCtuTUVXz##abCDEFGxyCDEEFGhi## + 0: abCDEFGxyCDEEFGhi## + 1: abCDEFGxyCDEEFGhi + 2: CDEEFG + 3: E + ##abAABCtuTUVXXz!!abCDEFGxyCDEFGGhi## + 0: abCDEFGxyCDEFGGhi## + 1: abCDEFGxyCDEFGGhi + 2: CDEFGG + 3: G + +/([a-zA-Z]+)(*scs:(1)(xy|ab(*ACCEPT)cd))/B +------------------------------------------------------------------ + Bra + CBra 1 + [A-Za-z]+ + Ket + Scan substring + 1 Capture ref + CBra 2 + xy + Alt + ab + Close 2 + *ASSERT_ACCEPT + cd + Ket + Ket + Ket + End +------------------------------------------------------------------ + ##cdefgh##cdeabxy## + 0: abxy + 1: abxy + 2: ab + +/(?[a-zA-Z]+)(*scs:('AA')(ab(*ACCEPT)cd|xy))/B +------------------------------------------------------------------ + Bra + CBra 1 + [A-Za-z]+ + Ket + Scan substring + 1 Capture ref + CBra 2 + ab + Close 2 + *ASSERT_ACCEPT + cd + Alt + xy + Ket + Ket + Ket + End +------------------------------------------------------------------ + ##cdefgh##cdeabxy## + 0: abxy + 1: abxy + 2: ab + +/([a-z]++)##(*scs:(1)(abc))?!/ + ##xyz##abc##! + 0: abc##! + 1: abc + 2: abc + ##xyz##! + 0: xyz##! + 1: xyz + ##xyz## +No match + +/([a-z]++)##(*scs:(1)(abc))??(?(2)!|:)/ + ##abc##abc##! + 0: abc##! + 1: abc + 2: abc + ##abc##xyz##: + 0: xyz##: + 1: xyz + ##abc### +No match + +/([a-z]++)##(*scs:(1)(abc)|xyz){8}(?(2)!|:)/ + ##abc##abc##! + 0: abc##! + 1: abc + 2: abc + ##abc##xyz##: + 0: xyz##: + 1: xyz + ##nnn##! +No match + ##nnn##: +No match + +/[A-Z]{3}([A-Z]++)#(*scs:(1)(?<=BC)XY)#/ + ABCXY##AKCXY## + 0: ABCXY## + 1: XY + +/()(\w++)=(*scs:(2)(?=abc))(\w++)/ + xabcx=pqr. + 0: abcx=pqr + 1: + 2: abcx + 3: pqr + +/(\d++)(*scs:(1)\d+\z)(\w+)/ + X123XYZ + 0: 123XYZ + 1: 123 + 2: XYZ + +/(\d++)(*scs:(1)\d+\Z)(\w+)/ + X123XYZ + 0: 123XYZ + 1: 123 + 2: XYZ + +/(\d++)(*scs:(1)\d+$)(\w+)/ + X123XYZ + 0: 123XYZ + 1: 123 + 2: XYZ + +/([a-z]{2})[a-z](*scs:(1)(.*?))\2$/ + abcab + 0: abcab + 1: ab + 2: ab + abcabc + 0: bcabc + 1: bc + 2: bc + +/^(([a-z]([a-z]*+))(*scs:(2).(?=(?1)|$)\3)|#){5}/ + abcdefg#hijk#! +No match + abcdefg#hijk#lmnopqr# + 0: abcdefg#hijk#lmnopqr + 1: lmnopqr + 2: lmnopqr + 3: mnopqr + +/(*scs:(1)a)(a)|x/ + a +No match + x + 0: x + +/(*scs:()a)(?a)(?b)(?c)(?d)|x/dupnames + abcd +No match + x + 0: x + +/(*scs:(1)a)?(a)/ + b +No match + a + 0: a + 1: a + +/(*scs:(1)a)??(a)/ + b +No match + a + 0: a + 1: a + +# Custom backtrack, goes back n - 1 characters in the input (n=8) +/x(?|(*scs:(1)(?<=(.)))|()){8}/ + abcdefghx + 0: x + 1: c + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE)x)).+|(.+)/ + abcdef + 0: bcdef + 1: + 2: + 3: bcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr)x)).+|(.+)/mark + abcdef + 0: bcdef + 1: + 2: + 3: bcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr))).+|(.+)/mark + abcdef + 0: abcdef + 1: a + 2: b +MK: markstr + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT)x)).+|(.+)/ + abcdef +No match + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr)x)).+|(.+)/mark + abcdef +No match, mark = markstr + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr))).+|(.+)/mark + abcdef + 0: abcdef + 1: a + 2: b +MK: markstr + +/(abc)(def)(*scs:(1)(*scs:(2)de(*SKIP)x)).+|(.+)/ + abcdefghi + 0: fghi + 1: + 2: + 3: fghi + +/(abc)(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(.+)/ + abcdefghi + 0: bcdefghi + 1: + 2: + 3: bcdefghi + +/(?<=(abc))(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(ef.+)/ + abcdefghi + 0: efghi + 1: + 2: + 3: efghi + +/(abc)(def)(*scs:(2)(?:(*scs:(1)abc(*SKIP:notfound)x|abcd|(abc)))).+/ + abcdefghi + 0: abcdefghi + 1: abc + 2: def + 3: abc + +/(abc)(def)(*MARK:markstr)(*scs:(2)(?:(*scs:(1)abc(*SKIP:markstr)x))).+|(.+)/ + abcdefghi + 0: ghi + 1: + 2: + 3: ghi + +/^([a-z]++)(?:((?6))|((?7))|((?8))|(#))(?(DEFINE)((*scs:(1)abc(*PRUNE)d))((*scs:(1)abc(*COMMIT)e))((*scs:(1)abc(*SKIP)f)))/ + abcd# + 0: abcd + 1: abcd + 2: + abce# + 0: abce + 1: abce + 2: + 3: + abcf# + 0: abcf + 1: abcf + 2: + 3: + 4: + abc# + 0: abc# + 1: abc + 2: + 3: + 4: + 5: # + +/\b(\w++)(*scs:(1)^)/ + sausages and mash + 0: sausages + 1: sausages +\= Expect no match + !sausages and mash +No match + +/(\b\w{3,}+\b)(*scs:(1)\W*+(?:((.)\W*+(?2)\W*+\3|)|((.)\W*+(?4)\W*+\5|\W*+.\W*+))\W*+$)/ig + ipsum lorem revel level able was I ere I saw Elba + 0: level + 1: level + 2: + 3: + 4: level + 5: l + 0: ere + 1: ere + 2: + 3: + 4: ere + 5: e + +/(?:(?'A'a)|(?b))(*scs:('A')b)c/dupnames + abc + 0: bc + 1: + 2: b + +# Relative reference +/(xyz)(abc)(*scs:(-1)abc)(*scs:(-2)\1)/ + >xyzabc< + 0: xyzabc + 1: xyz + 2: abc + +/^([a-z]++)#(*scs:(1)a|ab|abc|abcd|abcde|abcdef|(abcdefg))\2/ + abcdefg#abcdefg + 0: abcdefg#abcdefg + 1: abcdefg + 2: abcdefg + +/^([a-z]++)(*scs:(1)(a+)(*THEN)b|(a+)(*THEN)c|(aa))/ + aaaax + 0: aaaax + 1: aaaax + 2: + 3: + 4: aa + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b)|(a+)(*THEN)c|(aa))/ + aaaax + 0: aaaax + 1: aaaax + 2: + 3: + 4: + 5: aa + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b))?/ + aaaax + 0: aaaax + 1: aaaax + +/^([a-z]++)(*scs:(1)(abc|(a+)(*THEN)b))?/ + aaaax + 0: aaaax + 1: aaaax + +/^(?:(.){20,30}#|([a-z]++)(*scs:(1)(a+)(*THEN)b){20,30}#|(.){20,30}!)/ + aaaaaaaaaaaaaaaaaaaaaaaaab! + 0: aaaaaaaaaaaaaaaaaaaaaaaaab! + 1: + 2: + 3: + 4: b + +# List of captures + +/(?:(abc)|(?def)|ghi)(*scs:(1,'PP').(.))/B +------------------------------------------------------------------ + Bra + Bra + CBra 1 + abc + Ket + Alt + CBra 2 + def + Ket + Alt + ghi + Ket + Scan substring + 1 Capture ref + 2 Capture ref + Any + CBra 3 + Any + Ket + Ket + Ket + End +------------------------------------------------------------------ + abc + 0: abc + 1: abc + 2: + 3: b + def + 0: def + 1: + 2: def + 3: e + ghi +No match + +/(?:(?abc)|(?def)|(ghi)|(?'NN'jkl)|mno)(*scs:('MM',3,).(.))/B,dupnames +------------------------------------------------------------------ + Bra + Bra + CBra 1 + abc + Ket + Alt + CBra 2 + def + Ket + Alt + CBra 3 + ghi + Ket + Alt + CBra 4 + jkl + Ket + Alt + mno + Ket + Scan substring + Capture ref 2 + 3 Capture ref + 4 Capture ref + Any + CBra 5 + Any + Ket + Ket + Ket + End +------------------------------------------------------------------ + abc + 0: abc + 1: abc + 2: + 3: + 4: + 5: b + def + 0: def + 1: + 2: def + 3: + 4: + 5: e + ghi + 0: ghi + 1: + 2: + 3: ghi + 4: + 5: h + jkl + 0: jkl + 1: + 2: + 3: + 4: jkl + 5: k + mno +No match + +/f(?:(*scs:(+1,+2)(?<=(.)))|()){16}/ + 1234567890abcdef + 0: f + 1: 2 + 2: + 1ffffffffffffff + 0: f + 1: 1 + 2: + +/(?a)(*scan_substring:(1,'AA',1,)a)b/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + 1 Capture ref + 1 Capture ref + 1 Capture ref + a + Ket + b + Ket + End +------------------------------------------------------------------ + ab + 0: ab + 1: a + ac +No match + +/()()()(?<=ab(*scs:(1,2,3))cd)xyz/ + abcdxyz + 0: xyz + 1: + 2: + 3: + +/()()()(?<=ab(*ACCEPT)(*scs:(1,2,3))cd|efg)xyz/ + abxyz + 0: xyz + 1: + 2: + 3: + efgxyz + 0: xyz + 1: + 2: + 3: + +# Tests for pcre2_set_optimize() + +/abc/I,optimization_none +Capture group count = 0 +Optimizations: + +/abc/I,optimization_none,auto_possess +Capture group count = 0 +Optimizations: auto_possess + +/abc/I,optimization_none,dotstar_anchor,auto_possess +Capture group count = 0 +Optimizations: auto_possess,dotstar_anchor + +/abc/I,optimization_none,start_optimize +Capture group count = 0 +Optimizations: start_optimize +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc/I,dotstar_anchor_off,optimization_full +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +# If pcre2_set_optimize() is used to turn on some optimization, but at the same time, +# the compile options word turns it off... the compile options word "wins": + +/abc/I,no_auto_possess,auto_possess +Capture group count = 0 +Options: no_auto_possess +Optimizations: dotstar_anchor,start_optimize +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc/I,no_dotstar_anchor,dotstar_anchor +Capture group count = 0 +Options: no_dotstar_anchor +Optimizations: auto_possess,start_optimize +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc/I,no_start_optimize,start_optimize +Capture group count = 0 +Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor + +# -------------- + +# larger than GROUP_MAX, smaller than INT_MAX +/a\800000b/ +Failed: error 161 at offset 8: subpattern number is too big + +# coming up on INT_MAX... (used to succeed with \8 being literal 8) +/a\800000000b/ +Failed: error 161 at offset 11: subpattern number is too big + +# over INT_MAX (used to succeed with \8 being literal 8) +/a\8000000000b/ +Failed: error 161 at offset 12: subpattern number is too big + +# -------------- + +# no_bs0 + +/a\0b\x00c\00d/ + a\x{00}b\x{00}c\x{00}d + 0: a\x00b\x00c\x00d + +/a\0b/no_bs0 +Failed: error 198 at offset 3: octal digit missing after \0 (PCRE2_EXTRA_NO_BS0 is set) + +/b\x00c\00d/no_bs0 + b\x{00}c\x{00}d + 0: b\x00c\x00d + +/abc/substitute_extended + abc\=replace=a\0b\x00c\00d + 1: a\x00b\x00c\x00d + +/abc/substitute_extended,no_bs0 + abc\=replace=a\0b +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + abc\=replace=b\x00c\00d + 1: b\x00c\x00d + +# python_octal + +/\0-\00-\01-\012-\0123-\123-\1234/ + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + 0: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + +/\1/ +Failed: error 115 at offset 1: reference to non-existent subpattern + +/\12/ + \o{12} + 0: \x0a + +/abc/substitute_extended + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + 1: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + abc\=replace=\1 +Failed: error -49 at offset 2 in replacement: unknown substring + abc\=replace=\12 + 1: \x0a + +/\0-\00-\01-\012-\0123-\123-\1234/python_octal + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + 0: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + +/\1/python_octal +Failed: error 115 at offset 1: reference to non-existent subpattern + +/\12/python_octal +Failed: error 115 at offset 2: reference to non-existent subpattern + +/abc/substitute_extended,python_octal + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + 1: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + abc\=replace=\1 +Failed: error -49 at offset 2 in replacement: unknown substring + abc\=replace=\12 +Failed: error -49 at offset 3 in replacement: unknown substring + +# -------------- + +/a(?C)b/ + abc +--->abc + 0 ^^ b + 0: ab + abc\=callout_none + 0: ab + +/a(?C)b/never_callout +Failed: error 203 at offset 3: using callouts is disabled by the application + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[a[]/ + [ + 0: [ + +/[a[]/alt_extended_class +Failed: error 106 at offset 4: missing terminating ] for character class + +/[a[B]/alt_extended_class +Failed: error 212 at offset 5: missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS) + +/[a[B]]C/B,alt_extended_class +------------------------------------------------------------------ + Bra + [Ba] + C + Ket + End +------------------------------------------------------------------ + aC + 0: aC + BC + 0: BC +\= Expect no match + [C +No match + +/[[A][B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AB] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B +\= Expect no match + [ +No match + ] +No match + +/[[A]||[B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AB] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B +\= Expect no match + C +No match + +/[[^A][B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^A] + Ket + End +------------------------------------------------------------------ + B + 0: B + C + 0: C +\= Expect no match + A +No match + +/[^[A][B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^AB] + Ket + End +------------------------------------------------------------------ + C + 0: C +\= Expect no match + A +No match + B +No match + +/[^[A]&&[B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B + C + 0: C + +/[[AC]||[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A-C] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B + C + 0: C +\= Expect no match + D +No match + +/[[AC]&&[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [C] + Ket + End +------------------------------------------------------------------ + C + 0: C +\= Expect no match + A +No match + B +No match + D +No match + +/[[AC]--[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + C +No match + D +No match + +/[[AC]~~[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AB] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B +\= Expect no match + C +No match + D +No match + +/[A[]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A\]] + Ket + End +------------------------------------------------------------------ + A + 0: A + ] + 0: ] +\= Expect no match + [ +No match + +/[A[^]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\]] + Ket + End +------------------------------------------------------------------ + A + 0: A + [ + 0: [ + C + 0: C +\= Expect no match + ] +No match + +/[A[]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + ] +No match + [ +No match + +/[A[^]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + A + 0: A + C + 0: C + [ + 0: [ + ] + 0: ] + +/[A-C--B]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AC] + Ket + End +------------------------------------------------------------------ + A + 0: A + C + 0: C +\= Expect no match + B +No match + +/[^A-C--B]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^AC] + Ket + End +------------------------------------------------------------------ + B + 0: B +\= Expect no match + A +No match + C +No match + +/[[\d\D]--b]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^b] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + b +No match + +/[\dAC-E[:space:]&&[^z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\x09-\x0d 0-9AC-E] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + A + 0: A + C + 0: C + D + 0: D + E + 0: E + \t + 0: \x09 +\= Expect no match + B +No match + F +No match + ; +No match + +/[z||[^\dAC-E[:space:]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\x09-\x0d 0-9AC-E] + Ket + End +------------------------------------------------------------------ + z + 0: z + B + 0: B + F + 0: F + ; + 0: ; +\= Expect no match + 0 +No match + A +No match + C +No match + D +No match + E +No match + \t +No match + +/[ab||cd]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-d] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + e +No match + +/[[a]b||[c]d]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-d] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + e +No match + +/[a[b]||c[d]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-d] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + e +No match + +/[-&&-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-] + Ket + End +------------------------------------------------------------------ + - + 0: - +\= Expect no match + a +No match + +/[a-&&-a]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/[-a&&a-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/[[a]-&&-[a]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/[-[a]&&[a]-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/(?xx:[ ^ a[ ^ b] ])/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + [b] + Ket + Ket + End +------------------------------------------------------------------ + b + 0: b +\= Expect no match + A +No match + a +No match + c +No match + +/[ ^ a[ ^ b] ]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ ^ab] + Ket + End +------------------------------------------------------------------ + \x20 + 0: + ^ + 0: ^ + a + 0: a + b + 0: b +\= Expect no match + c +No match + +/[a-c--b]+/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ac]++ + Ket + End +------------------------------------------------------------------ + ac + 0: ac + a + 0: a +\= Expect no match + b +No match + +/[a-c--b]{2,3}/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ac]{2,3}+ + Ket + End +------------------------------------------------------------------ + ac + 0: ac + cac + 0: cac +\= Expect no match + a +No match + bb +No match + +/x[a-c--b]+y/B,alt_extended_class +------------------------------------------------------------------ + Bra + x + [ac]++ + y + Ket + End +------------------------------------------------------------------ + xacy + 0: xacy + xaay + 0: xaay + xay + 0: xay +\= Expect no match + zacy +No match + xacz +No match + xy +No match + xby +No match + +/[A--B--C--D]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + +/[A--A--A]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + A +No match + B +No match + +/[[A--A]--A]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + A +No match + B +No match + +/[A--[A--A]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + +/[A--^B]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + ^ +No match + z +No match + +/([a-z--n])\1/B,alt_extended_class +------------------------------------------------------------------ + Bra + CBra 1 + [a-mo-z] + Ket + \1 + Ket + End +------------------------------------------------------------------ + aa + 0: aa + 1: a + zz + 0: zz + 1: z +\= Expect no match + az +No match + nn +No match + +/(x[a-z--n]y)\1/B,alt_extended_class +------------------------------------------------------------------ + Bra + CBra 1 + x + [a-mo-z] + y + Ket + \1 + Ket + End +------------------------------------------------------------------ + xayxay + 0: xayxay + 1: xay + xzyxzy + 0: xzyxzy + 1: xzy +\= Expect no match + xnyxny +No match + +/(?:_\1|([a-z--n])){2}/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + Ket + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + Ket + Ket + End +------------------------------------------------------------------ + a_a + 0: a_a + 1: a + z_z + 0: z_z + 1: z +\= Expect no match + a_z +No match + n_n +No match + +/(?:_\1|([a-z--n]))+/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + a_a + 0: a_a + 1: a + z_z + 0: z_z + 1: z + a_partial + 0: a + 1: a +\= Expect no match + n_n +No match + +/[\d-[z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-0-9z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[\d-||z]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-0-9z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[z[\d-]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-0-9z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[1-[z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-1z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[1-||z]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-1z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[z[1-]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-1z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[a--/alt_extended_class +Failed: error 106 at offset 4: missing terminating ] for character class + +/[a--a/alt_extended_class +Failed: error 106 at offset 5: missing terminating ] for character class + +/[a--[a/alt_extended_class +Failed: error 106 at offset 6: missing terminating ] for character class + +/[a--[a]/alt_extended_class +Failed: error 212 at offset 7: missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS) + +/[a--[a]--/alt_extended_class +Failed: error 212 at offset 9: missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS) + +/[a--]/alt_extended_class +Failed: error 210 at offset 5: expected operand after operator in extended character class + +/[--a]/alt_extended_class +Failed: error 209 at offset 3: unexpected operator in extended character class (no preceding operand) + +/[^--a]/alt_extended_class +Failed: error 209 at offset 4: unexpected operator in extended character class (no preceding operand) + +/[--]/alt_extended_class +Failed: error 209 at offset 3: unexpected operator in extended character class (no preceding operand) + +/[a---b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a----b]/alt_extended_class +Failed: error 208 at offset 6: invalid operator in extended character class + +/[a&&&b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a|||b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a~~~b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a~~~~b]/alt_extended_class +Failed: error 208 at offset 6: invalid operator in extended character class + +/[a~~/alt_extended_class +Failed: error 106 at offset 4: missing terminating ] for character class + +/[a~~~/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a~~~~/alt_extended_class +Failed: error 208 at offset 6: invalid operator in extended character class + +/[a||b&&c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a||b~~c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a~~b&&c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a--b~~c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a--b&&c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a||b--c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a||[b--c]]/alt_extended_class + a + 0: a + b + 0: b +\= Expect no match + c +No match + +/[\d-z]/B,alt_extended_class +Failed: error 150 at offset 4: invalid range in character class + +/[z-\d]/B,alt_extended_class +Failed: error 150 at offset 5: invalid range in character class + +/[abc -- b]+/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ac]++ + Ket + End +------------------------------------------------------------------ + acacbac + 0: acac + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a]]]]]]]]]]]]]]]/alt_extended_class + a + 0: a +\= Expect no match + b +No match + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[z]&&a]]]]]]]]]]]]]]]/alt_extended_class +Failed: error 207 at offset 115: extended character class nesting is too deep + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a[z]]]]]]]]]]]]]]]]/alt_extended_class +Failed: error 207 at offset 118: extended character class nesting is too deep + +/[z&/alt_extended_class +Failed: error 106 at offset 3: missing terminating ] for character class + +/[[^]~~[^]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + a +No match + +/[^[[^]~~[^]]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + a + 0: a + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# allow-empty-class does nothing inside (?[...]) +/(?[ []] ])/B,allow_empty_class +------------------------------------------------------------------ + Bra + [\]] + Ket + End +------------------------------------------------------------------ + ] + 0: ] + +# bad-escape-is-literal does nothing inside (?[...]) +/[ \j ]/ +Failed: error 103 at offset 3: unrecognized character follows \ + +/[ /\ +Failed: error 101 at offset 3: \ at end of pattern + +/(?[ \j ])/ +Failed: error 103 at offset 5: unrecognized character follows \ + +/(?[ /\ +Failed: error 101 at offset 5: \ at end of pattern + +/[ \j ]/bad_escape_is_literal + j + 0: j +\= Expect no match + k +No match + +/[ /\bad_escape_is_literal +Failed: error 106 at offset 3: missing terminating ] for character class + +/(?[ \j ])/bad_escape_is_literal +Failed: error 103 at offset 5: unrecognized character follows \ + +/(?[ /\bad_escape_is_literal +Failed: error 101 at offset 5: \ at end of pattern + +/(?[ [\j] ])/bad_escape_is_literal +Failed: error 103 at offset 6: unrecognized character follows \ + +/(?[ (\j) ])/bad_escape_is_literal +Failed: error 103 at offset 6: unrecognized character follows \ + +# We can't test error cases in testinput1 + +/(?[])/ +Failed: error 214 at offset 4: empty expression in extended character class + +/(?[/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/(?[]/ +Failed: error 214 at offset 4: empty expression in extended character class + +/(?[\n/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/(?[\n]/ +Failed: error 215 at offset 6: terminating ] with no following closing parenthesis in (?[...] + +/(?[\n]z)/ +Failed: error 215 at offset 6: terminating ] with no following closing parenthesis in (?[...] + +/(?[\n] )/ +Failed: error 215 at offset 6: terminating ] with no following closing parenthesis in (?[...] + +/(?[(/ +Failed: error 114 at offset 4: missing closing parenthesis + +/(?[( / +Failed: error 106 at offset 5: missing terminating ] for character class + +/(?[(\n/ +Failed: error 106 at offset 6: missing terminating ] for character class + +/(?[ \n + () ])/ +Failed: error 214 at offset 11: empty expression in extended character class + +/(?[1])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[a])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[a-c])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[(])/ +Failed: error 114 at offset 4: missing closing parenthesis + +/(?[(\n])/ +Failed: error 114 at offset 6: missing closing parenthesis + +/(?[\n)])/ +Failed: error 122 at offset 6: unmatched closing parenthesis + +/(?[^\n])/ +Failed: error 209 at offset 4: unexpected operator in extended character class (no preceding operand) + +/(?[ \n \t ])/ +Failed: error 213 at offset 9: unexpected expression in extended character class (no preceding operator) + +/(?[ \d \t ])/ +Failed: error 213 at offset 9: unexpected expression in extended character class (no preceding operator) + +/(?[ [\n] \t ])/ +Failed: error 213 at offset 11: unexpected expression in extended character class (no preceding operator) + +/(?[ (\n) \t ])/ +Failed: error 213 at offset 11: unexpected expression in extended character class (no preceding operator) + +/(?[ [:alpha:] \t ])/ +Failed: error 213 at offset 16: unexpected expression in extended character class (no preceding operator) + +/(?[ \n + \t \d ])/ +Failed: error 213 at offset 14: unexpected expression in extended character class (no preceding operator) + +/(?[ !\n \t ])/ +Failed: error 213 at offset 10: unexpected expression in extended character class (no preceding operator) + +/(?[ \n [:alpha:] ])/ +Failed: error 213 at offset 16: unexpected expression in extended character class (no preceding operator) + +/(?[ \n [\d] ])/ +Failed: error 213 at offset 8: unexpected expression in extended character class (no preceding operator) + +/(?[ \n (\t) ])/ +Failed: error 213 at offset 8: unexpected expression in extended character class (no preceding operator) + +/(?[ \n !\t ])/ +Failed: error 213 at offset 8: unexpected expression in extended character class (no preceding operator) + +/(?[ \n \t ])/ +Failed: error 213 at offset 9: unexpected expression in extended character class (no preceding operator) + +/(?[:graph:])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[\Qn\E])/ +Failed: error 216 at offset 6: unexpected character in (?[...]) extended character class + +# maximum depth tests + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&\n))))))))))))))])/ + \n + 0: \x0a +\= Expect no match + a +No match + b +No match + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+([\n]&\n))))))))))))))])/ +Failed: error 207 at offset 158: extended character class nesting is too deep + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&[\n]))))))))))))))])/ +Failed: error 207 at offset 161: extended character class nesting is too deep + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+((\n)&\n))))))))))))))])/ +Failed: error 207 at offset 158: extended character class nesting is too deep + +# -------------- + +/[[:digit:] -Z]/xx +Failed: error 150 at offset 14: invalid range in character class + +/[\d -Z]/xx +Failed: error 150 at offset 7: invalid range in character class + +/[[:digit:]\E-H]/ +Failed: error 150 at offset 13: invalid range in character class + +/[[:digit:]\Q\E-H]+/ +Failed: error 150 at offset 15: invalid range in character class + +/[z-[:space:]]/ +Failed: error 150 at offset 12: invalid range in character class + +/[z-\d]/ +Failed: error 150 at offset 5: invalid range in character class + +/[[:space:]-z]/ +Failed: error 150 at offset 11: invalid range in character class + +/[\d-z]/ +Failed: error 150 at offset 4: invalid range in character class + +/[\d-\w]/ +Failed: error 150 at offset 4: invalid range in character class + +/[\Q/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/[\Q/\ +Failed: error 106 at offset 4: missing terminating ] for character class + +/[\Q\E/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[\Q\n/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[\Q\n]/ +Failed: error 106 at offset 6: missing terminating ] for character class + +/[\Q\n/\ +Failed: error 106 at offset 6: missing terminating ] for character class + +/[\Q\n\]/ +Failed: error 106 at offset 7: missing terminating ] for character class + +/[\Q\n\E/ +Failed: error 106 at offset 7: missing terminating ] for character class + +/[\Q\n\E]/ + \\ + 0: \ + n + 0: n +\= Expect no match + \n +No match + Q +No match + +/[z\Q/ +Failed: error 106 at offset 4: missing terminating ] for character class + +/[z\Q/\ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[z\Q\E/ +Failed: error 106 at offset 6: missing terminating ] for character class + +/[/\ +Failed: error 101 at offset 2: \ at end of pattern + +/[\n/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/[\E/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/[\^z]/B +------------------------------------------------------------------ + Bra + [\^z] + Ket + End +------------------------------------------------------------------ + +/[ \^]/B +------------------------------------------------------------------ + Bra + [ ^] + Ket + End +------------------------------------------------------------------ + +/[\\z]/B +------------------------------------------------------------------ + Bra + [\\z] + Ket + End +------------------------------------------------------------------ + +/[0-z]/B +------------------------------------------------------------------ + Bra + [0-z] + Ket + End +------------------------------------------------------------------ + +/[0\-z]/B +------------------------------------------------------------------ + Bra + [\-0z] + Ket + End +------------------------------------------------------------------ + +/[]z]/B +------------------------------------------------------------------ + Bra + [\]z] + Ket + End +------------------------------------------------------------------ + +/[ \]]/B +------------------------------------------------------------------ + Bra + [ \]] + Ket + End +------------------------------------------------------------------ + +/[ --]/B +------------------------------------------------------------------ + Bra + [ -\-] + Ket + End +------------------------------------------------------------------ + +/[A-\]]/B +------------------------------------------------------------------ + Bra + [A-\]] + Ket + End +------------------------------------------------------------------ + +/[A-\\]/B +------------------------------------------------------------------ + Bra + [A-\\] + Ket + End +------------------------------------------------------------------ + +/[\A]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\Z]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\z]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\G]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\K]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\g<1>]/ + < + 0: < + g + 0: g +\= Expect no match + \\ +No match + +/[\k<1>]/ + < + 0: < + k + 0: k +\= Expect no match + \\ +No match + +/[\u{ 1z}]/alt_bsux,extra_alt_bsux + u + 0: u + { + 0: { + } + 0: } + \x20 + 0: + 1 + 0: 1 +\= Expect no match + \\ +No match + +/[a\x{e1}]/iB +------------------------------------------------------------------ + Bra + [Aa\xe1] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + \x{e1} + 0: \xe1 + +# -------------- + +# Attempt at full coverage of the substitution buffer-management code - not +# just covering each line in each macro, but covering each instantiation of each +# line in those macros. + +# +# CHECKMEMCPY tests +# +# Four conditions for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Additionally some CHECKMEMCPYs have a substitute_replacement_only branch. +# + +# pre-start-offset fragment +# no "overflow after previous overflow" condition +/a/ + XYaZ\=offset=2,replace=foo + 1: XYfooZ + XYaZ\=offset=2,replace=[1]foo +Failed: error -48: no more memory + XYaZ\=offset=2,substitute_overflow_length,replace=[1]foo +Failed: error -48: no more memory: 7 code units are needed + XYaZ\=offset=2,substitute_replacement_only,replace=foo + 1: foo + +# pre-match fragment +/a/ + XYaZ\=replace=foo + 1: XYfooZ + XYaZ\=replace=[1]foo +Failed: error -48: no more memory + XYaZ\=substitute_overflow_length,replace=[1]foo +Failed: error -48: no more memory: 7 code units are needed + XXYaZ\=offset=2,substitute_overflow_length,replace=[1]foo +Failed: error -48: no more memory: 8 code units are needed + XYaZ\=substitute_replacement_only,replace=foo + 1: foo + +# empty match bumpalong +/(?<=abc)(|DEF)/g + abcDEFabcZ\=replace=+ + 3: abc++abc+Z + abcDEFabcZ\=replace=[5]+ +Failed: error -48: no more memory + abcDEFabcZ\=substitute_overflow_length,replace=[5]+ +Failed: error -48: no more memory: 11 code units are needed + abcDEFabcZ\=replace=[9]+ +Failed: error -48: no more memory + abcDEFabcZ\=substitute_overflow_length,replace=[9]+ +Failed: error -48: no more memory: 11 code units are needed + abcDEFabcZ\=substitute_overflow_length,replace=[1]+ +Failed: error -48: no more memory: 11 code units are needed + abcDEFabcZ\=substitute_replacement_only,replace=+ + 3: +++ + +# literal replacement +/a/ + XYaZ\=substitute_literal,replace=$0 + 1: XY$0Z + XYaZ\=substitute_literal,replace=[3]$0 +Failed: error -48: no more memory + XYaZ\=substitute_literal,substitute_overflow_length,replace=[3]$0 +Failed: error -48: no more memory: 6 code units are needed + XYaZ\=substitute_literal,substitute_overflow_length,replace=[1]$0 +Failed: error -48: no more memory: 6 code units are needed + +# a MARK +/(*:pear)apple/ + XappleY\=replace=${*MARK} + 1: XpearY + XappleY\=replace=[3]${*MARK} +Failed: error -48: no more memory + XappleY\=substitute_overflow_length,replace=[3]${*MARK} +Failed: error -48: no more memory: 7 code units are needed + XXappleY\=substitute_overflow_length,replace=[1]${*MARK} +Failed: error -48: no more memory: 8 code units are needed + +# a subject fragment +/a(bb)c/ + XabbcY\=replace=$1 + 1: XbbY + XabbcY\=replace=[2]$1 +Failed: error -48: no more memory + XabbcY\=substitute_overflow_length,replace=[2]$1 +Failed: error -48: no more memory: 5 code units are needed + XXabbcY\=substitute_overflow_length,replace=[1]$1 +Failed: error -48: no more memory: 6 code units are needed + +# a zero-length subject fragment +/a()c/ + XacY\=replace=$1 + 1: XY + XacY\=replace=[2]$1 +Failed: error -48: no more memory + XacY\=substitute_overflow_length,replace=[2]$1 +Failed: error -48: no more memory: 3 code units are needed + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\x{48} + 1: XHY + XabcY\=replace=[1]\x{48} +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]\x{48} +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]\x{48} +Failed: error -48: no more memory: 5 code units are needed + +# a replacement literal character +/abc/ + XabcY\=replace=Z + 1: XZY + XabcY\=replace=[1]Z +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 5 code units are needed + +# a cancelled substitution +# no "overflow after previous overflow" condition +/abc/substitute_skip=1 + XabcY\=replace=Z + 1(1) Old 1 4 "abc" New 1 2 "Z SKIPPED" + 1: XabcY + XabcY\=replace=[3]Z + 1(1) Old 1 4 "abc" New 1 2 "Z SKIPPED" +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[3]Z + 1(1) Old 1 4 "abc" New 1 2 "Z SKIPPED" +Failed: error -48: no more memory: 6 code units are needed + XabcY\=substitute_replacement_only,replace=Z + 1(1) Old 1 4 "abc" New 0 1 "Z SKIPPED" + 1: + +# the rest of the subject +/abc/ + XabcYY\=replace=Z + 1: XZYY + XabcYY\=replace=[3]Z +Failed: error -48: no more memory + XabcYY\=substitute_overflow_length,replace=[3]Z +Failed: error -48: no more memory: 5 code units are needed + XabcYY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 5 code units are needed + XabcYY\=substitute_replacement_only,replace=Z + 1: Z + +# the trailing NULL +/abc/ + XabcY\=replace=Z + 1: XZY + XabcY\=replace=[3]Z +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[3]Z +Failed: error -48: no more memory: 4 code units are needed + XabcY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 4 code units are needed + +# +# CHECKCASECPY tests +# +# The same four conditions for CHECKCASECPY as for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Also the condition where CHECKCASECPY isn't called due to a custom callout +# + +# a MARK +/(*:pear)apple/substitute_extended + XappleY\=replace=\U${*MARK} + 1: XPEARY + XappleY\=replace=[3]\U${*MARK} +Failed: error -48: no more memory + XappleY\=substitute_overflow_length,replace=[3]\U${*MARK} +Failed: error -48: no more memory: 7 code units are needed + XXappleY\=substitute_overflow_length,replace=[1]\U${*MARK} +Failed: error -48: no more memory: 8 code units are needed + XappleY\=substitute_case_callout,replace=\U${*MARK} + 1: XpeBrY + +# a subject fragment +/a(bb)c/substitute_extended + XabbcY\=replace=\U$1 + 1: XBBY + XabbcY\=replace=[2]\U$1 +Failed: error -48: no more memory + XabbcY\=substitute_overflow_length,replace=[2]\U$1 +Failed: error -48: no more memory: 5 code units are needed + XXabbcY\=substitute_overflow_length,replace=[1]\U$1 +Failed: error -48: no more memory: 6 code units are needed + XabbcY\=substitute_case_callout,replace=\U$1 + 1: XbbY + +# a zero-length subject fragment +/a()c/substitute_extended + XacY\=replace=\U$1 + 1: XY + XacY\=replace=[2]\U$1 +Failed: error -48: no more memory + XacY\=substitute_overflow_length,replace=[2]\U$1 +Failed: error -48: no more memory: 3 code units are needed + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\U\x{48} + 1: XHY + XabcY\=replace=[1]\U\x{48} +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]\U\x{48} +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]\U\x{48} +Failed: error -48: no more memory: 5 code units are needed + XabcY\=substitute_case_callout,replace=\U\x{48} + 1: XHY + +# a replacement literal character +/abc/substitute_extended + XabcY\=replace=\UZ + 1: XZY + XabcY\=replace=[1]\UZ +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]\UZ +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]\UZ +Failed: error -48: no more memory: 5 code units are needed + XabcY\=substitute_case_callout,replace=\UZ + 1: XZY + +# +# DELAYEDFORCECASE tests +# +# Some different triggering conditions for DELAYEDFORCECASE: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# if there was a previous overflow, then the case callout can't be invoked +# Also, the CASEERROR branch. +# Also, the branch for where chars_outstanding is zero, both with and without +# a previous overflow. +# + +# on set casing mode +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=\Uf\Lq + 1: XSSqY + XabcY\=replace=[2]\Uf\Lq +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[2]\Uf\Lq +Failed: error -48: no more memory: 16 code units are needed + XabcY\=substitute_overflow_length,replace=[1]\Uf\Lq +Failed: error -48: no more memory: 25 code units are needed + XabcY\=replace=\U!\Lq +Failed: error -69: error performing replacement case transformation + XabcY\=replace=\U\Lq + 1: XqY + XXabcY\=substitute_overflow_length,replace=[1]\U\Lq +Failed: error -48: no more memory: 15 code units are needed + +# trailing fragment +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=f + 1: XfY + XabcY\=replace=\Uf + 1: XSSY + XabcY\=replace=[2]\Uf +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[2]\Uf +Failed: error -48: no more memory: 5 code units are needed + XabcY\=substitute_overflow_length,replace=[1]\Uf +Failed: error -48: no more memory: 14 code units are needed + XabcY\=replace=\U! +Failed: error -69: error performing replacement case transformation + XabcY\=replace=\U + 1: XY + XXabcY\=substitute_overflow_length,replace=[1]\U +Failed: error -48: no more memory: 4 code units are needed + +# +# do_case_copy tests +# + +/aa/i,substitute_extended + XaaY\=replace=\Uaa\uaa\LAA\lAA\U\lAA\L\uaa\u\LaaA\l\UAAa + 1: XAAAaaaaAaAAaAaaaAAY + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory + XaaY\=replace=[5]\uaa + 1: XAaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[5]\u$0 + 1: XAaY + XaaY\=replace=[1]\lAA +Failed: error -48: no more memory + XaaY\=replace=[2]\lAA +Failed: error -48: no more memory + XaaY\=replace=[3]\lAA +Failed: error -48: no more memory + XaaY\=replace=[4]\lAA +Failed: error -48: no more memory + XaaY\=replace=[5]\lAA + 1: XaAY + XAAY\=replace=[1]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[2]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[3]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[4]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[5]\l$0 + 1: XaAY + XaaY\=replace=[1]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[2]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[3]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[5]\l\UAa + 1: XaAY + XAaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[5]\l\U$0 + 1: XaAY + XaaY\=replace=[1]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[2]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[3]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[4]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[5]\u\LaA + 1: XAaY + XaAY\=replace=[1]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[2]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[3]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[4]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[5]\u\L$0 + 1: XAaY + +/aa/i,substitute_extended,substitute_overflow_length + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\uaa + 1: XAaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u$0 + 1: XAaY + XaaY\=replace=[1]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\lAA + 1: XaAY + XAAY\=replace=[1]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[2]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[3]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[4]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[5]\l$0 + 1: XaAY + XaaY\=replace=[1]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\l\UAa + 1: XaAY + XAaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[5]\l\U$0 + 1: XaAY + XaaY\=replace=[1]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u\LaA + 1: XAaY + XaAY\=replace=[1]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[2]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[3]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[4]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[5]\u\L$0 + 1: XAaY + +/aa/i,substitute_extended,substitute_case_callout + XaaY\=replace=\Uaa\uaa\LBB\lBB\U\lBB\L\uaa\u\LaaB\l\UBBa + 1: XBBBaaaaBaBBaBaaaBBY + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory + XaaY\=replace=[5]\uaa + 1: XBaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[5]\u$0 + 1: XBaY + XaaY\=replace=[1]\lBB +Failed: error -48: no more memory + XaaY\=replace=[2]\lBB +Failed: error -48: no more memory + XaaY\=replace=[3]\lBB +Failed: error -48: no more memory + XaaY\=replace=[4]\lBB +Failed: error -48: no more memory + XaaY\=replace=[5]\lBB + 1: XaBY + XBBY\=replace=[1]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[2]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[3]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[4]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[5]\l$0 + 0: XBBY + XaaY\=replace=[1]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[2]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[3]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[5]\l\UBa + 1: XaBY + XBaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[5]\l\U$0 + 0: XBaY + XaaY\=replace=[1]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[2]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[3]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[4]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[5]\u\LaB + 1: XBaY + XaBY\=replace=[1]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[2]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[3]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[4]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[5]\u\L$0 + 0: XaBY + +/aa/i,substitute_extended,substitute_case_callout,substitute_overflow_length + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\uaa + 1: XBaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u$0 + 1: XBaY + XaaY\=replace=[1]\lBB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\lBB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\lBB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\lBB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\lBB + 1: XaBY + XBBY\=replace=[1]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[2]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[3]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[4]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[5]\l$0 + 0: XBBY + XaaY\=replace=[1]\l\UBa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\l\UBa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\l\UBa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\l\UBa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\l\UBa + 1: XaBY + XBaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[5]\l\U$0 + 0: XBaY + XaaY\=replace=[1]\u\LaB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\u\LaB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\u\LaB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u\LaB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u\LaB + 1: XBaY + XaBY\=replace=[1]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[2]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[3]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[4]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[5]\u\L$0 + 0: XaBY + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l\U!a +Failed: error -69: error performing replacement case transformation + XaaY\=replace=\l\Ua! +Failed: error -69: error performing replacement case transformation + XaaY\=replace=\ufa + 1: XSSaY + XaaY\=replace=[3]\ufa +Failed: error -48: no more memory + XaaY\=replace=\l\Uaoo + 1: XaOOOOY + XaaY\=replace=[4]\l\Uaoo +Failed: error -48: no more memory + XaaY\=replace=\l\UPa + 1: XppBY + XaaY\=replace=[3]\l\UPa +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UPa +Failed: error -48: no more memory + XaaY\=replace=\l\UPo + 1: XppOOY + XaaY\=replace=[3]\l\UPo +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UPo +Failed: error -48: no more memory + XaaY\=replace=\l\UPpp + 1: XppPY + XaaY\=replace=[4]\l\UPpp +Failed: error -48: no more memory + XaaY\=replace=[5]\l\UPpp +Failed: error -48: no more memory + +# +# special test-callback case transformation tests +# + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l! +Failed: error -69: error performing replacement case transformation + XaaY\=replace=\ua\lB + 1: XBaY + XaaY\=replace=\LdDZ\UdDZ\ud\uD\uZ + 1: XdddZZZDDDY + XaaY\=replace=\uf\Uf\Lf\Us\Ls\uS\lS + 1: XSSSSfSsSsY + XaaY\=replace=\LOO\LOQ\UOO\uo\lo + 1: XoOQOOOOoY + XaaY\=replace=\upq\upp\lpp\Upp\Lpp\lP\uP + 1: XpqppppPppppPY + XaaY\=replace=\ll\ul\Ul\LMmNn\UMmNn + 1: XlMnMNmmnnMMNNY + XaaY\=replace=\Uac\Uaca\Uak\Uaka\Lck\LBK\LBKB\LBK \UK + 1: XBKBKBBKBKBckacakaac KY + Xaay\=replace=\u\Lqj\u\Lij\u\LIj\u\LiJ\u\LIJ\u\Liq\u\Lij\Uij\UiIjJ\LiIjJ + 1: XqjIJIJIJIJIqIJIJIIJJiijjy + Xaay\=replace=\Uaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: XBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBy + +# -------------- + # End of testinput2 -Error -70: PCRE2_ERROR_BADDATA (unknown error number) +Error -80: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data Error -2: partial match Error -1: no match diff --git a/testdata/testoutput20 b/testdata/testoutput20 index 5ce720f..653deb9 100644 --- a/testdata/testoutput20 +++ b/testdata/testoutput20 @@ -38,7 +38,7 @@ Named capture groups: n 1 n 2 Options: dupnames -Starting code units: b f +Starting code units: b f Subject length lower bound = 6 foofoo 0: foofoo diff --git a/testdata/testoutput21 b/testdata/testoutput21 index fbd7400..3ded7ed 100644 --- a/testdata/testoutput21 +++ b/testdata/testoutput21 @@ -91,4 +91,7 @@ Subject length lower bound = 5 abZdeX 0: X +/[\C]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + # End of testinput21 diff --git a/testdata/testoutput23 b/testdata/testoutput23 index c6f0aa2..2914767 100644 --- a/testdata/testoutput23 +++ b/testdata/testoutput23 @@ -5,4 +5,7 @@ /a\Cb/ Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library +/a[\C]b/ +Failed: error 107 at offset 3: escape sequence is invalid in character class + # End of testinput23 diff --git a/testdata/testoutput26 b/testdata/testoutput26 index b951360..b1f00a1 100644 --- a/testdata/testoutput26 +++ b/testdata/testoutput26 @@ -1,3511 +1,3515 @@ -# These tests are generated by maint/GenerateTest26.py, do not edit. +# These tests were generated by maint/GenerateTest.py using PCRE2's UCP +# data, do not edit unless that data has changed and they are reflecting +# a previous version. -# Unicode Script Extension tests. +# Unicode Script Extension tests for version 15.0.0 + +#perltest # Base script check /^\p{sc=Latin}/utf - A + A 0: A /^\p{Script=Latn}/utf - \x{1df2a} + \x{1df2a} 0: \x{1df2a} # Script extension check /^\p{Latin}/utf - \x{363} + \x{363} 0: \x{363} /^\p{scx=Latn}/utf - \x{a92e} + \x{a92e} 0: \x{a92e} # Script extension only character /^\p{Latin}/utf - \x{363} + \x{363} 0: \x{363} /^\p{sc=Latin}/utf - \x{363} + \x{363} No match # Character not in script /^\p{Latin}/utf - \x{1df2b} + \x{1df2b} No match # Base script check /^\p{sc=Greek}/utf - \x{370} + \x{370} 0: \x{370} /^\p{Script=Grek}/utf - \x{1d245} + \x{1d245} 0: \x{1d245} # Script extension check /^\p{Greek}/utf - \x{342} + \x{342} 0: \x{342} /^\p{Script_Extensions=Grek}/utf - \x{1dc1} + \x{1dc1} 0: \x{1dc1} # Script extension only character /^\p{Greek}/utf - \x{342} + \x{342} 0: \x{342} /^\p{sc=Greek}/utf - \x{342} + \x{342} No match # Character not in script /^\p{Greek}/utf - \x{1d246} + \x{1d246} No match # Base script check /^\p{sc=Cyrillic}/utf - \x{400} + \x{400} 0: \x{400} /^\p{Script=Cyrl}/utf - \x{1e08f} + \x{1e08f} 0: \x{1e08f} # Script extension check /^\p{Cyrillic}/utf - \x{483} + \x{483} 0: \x{483} /^\p{scx=Cyrl}/utf - \x{a66f} + \x{a66f} 0: \x{a66f} # Script extension only character /^\p{Cyrillic}/utf - \x{2e43} + \x{2e43} 0: \x{2e43} /^\p{sc=Cyrillic}/utf - \x{2e43} + \x{2e43} No match # Character not in script /^\p{Cyrillic}/utf - \x{1e090} + \x{1e090} No match # Base script check /^\p{sc=Arabic}/utf - \x{600} + \x{600} 0: \x{600} /^\p{Script=Arab}/utf - \x{1eef1} + \x{1eef1} 0: \x{1eef1} # Script extension check /^\p{Arabic}/utf - \x{60c} + \x{60c} 0: \x{60c} /^\p{Script_Extensions=Arab}/utf - \x{102fb} + \x{102fb} 0: \x{102fb} # Script extension only character /^\p{Arabic}/utf - \x{102e0} + \x{102e0} 0: \x{102e0} /^\p{sc=Arabic}/utf - \x{102e0} + \x{102e0} No match # Character not in script /^\p{Arabic}/utf - \x{1eef2} + \x{1eef2} No match # Base script check /^\p{sc=Syriac}/utf - \x{700} + \x{700} 0: \x{700} /^\p{Script=Syrc}/utf - \x{86a} + \x{86a} 0: \x{86a} # Script extension check /^\p{Syriac}/utf - \x{60c} + \x{60c} 0: \x{60c} /^\p{scx=Syrc}/utf - \x{1dfa} + \x{1dfa} 0: \x{1dfa} # Script extension only character /^\p{Syriac}/utf - \x{1dfa} + \x{1dfa} 0: \x{1dfa} /^\p{sc=Syriac}/utf - \x{1dfa} + \x{1dfa} No match # Character not in script /^\p{Syriac}/utf - \x{1dfb} + \x{1dfb} No match # Base script check /^\p{sc=Thaana}/utf - \x{780} + \x{780} 0: \x{780} /^\p{Script=Thaa}/utf - \x{7b1} + \x{7b1} 0: \x{7b1} # Script extension check /^\p{Thaana}/utf - \x{60c} + \x{60c} 0: \x{60c} /^\p{Script_Extensions=Thaa}/utf - \x{fdfd} + \x{fdfd} 0: \x{fdfd} # Script extension only character /^\p{Thaana}/utf - \x{fdf2} + \x{fdf2} 0: \x{fdf2} /^\p{sc=Thaana}/utf - \x{fdf2} + \x{fdf2} No match # Character not in script /^\p{Thaana}/utf - \x{fdfe} + \x{fdfe} No match # Base script check /^\p{sc=Devanagari}/utf - \x{900} + \x{900} 0: \x{900} /^\p{Script=Deva}/utf - \x{11b09} + \x{11b09} 0: \x{11b09} # Script extension check /^\p{Devanagari}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Deva}/utf - \x{a8f3} + \x{a8f3} 0: \x{a8f3} # Script extension only character /^\p{Devanagari}/utf - \x{1cd1} + \x{1cd1} 0: \x{1cd1} /^\p{sc=Devanagari}/utf - \x{1cd1} + \x{1cd1} No match # Character not in script /^\p{Devanagari}/utf - \x{11b0a} + \x{11b0a} No match # Base script check /^\p{sc=Bengali}/utf - \x{980} + \x{980} 0: \x{980} /^\p{Script=Beng}/utf - \x{9fe} + \x{9fe} 0: \x{9fe} # Script extension check /^\p{Bengali}/utf - \x{951} + \x{951} 0: \x{951} /^\p{Script_Extensions=Beng}/utf - \x{a8f1} + \x{a8f1} 0: \x{a8f1} # Script extension only character /^\p{Bengali}/utf - \x{1cf7} + \x{1cf7} 0: \x{1cf7} /^\p{sc=Bengali}/utf - \x{1cf7} + \x{1cf7} No match # Character not in script /^\p{Bengali}/utf - \x{a8f2} + \x{a8f2} No match # Base script check /^\p{sc=Gurmukhi}/utf - \x{a01} + \x{a01} 0: \x{a01} /^\p{Script=Guru}/utf - \x{a76} + \x{a76} 0: \x{a76} # Script extension check /^\p{Gurmukhi}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Guru}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Gurmukhi}/utf - \x{a836} + \x{a836} 0: \x{a836} /^\p{sc=Gurmukhi}/utf - \x{a836} + \x{a836} No match # Character not in script /^\p{Gurmukhi}/utf - \x{a83a} + \x{a83a} No match # Base script check /^\p{sc=Gujarati}/utf - \x{a81} + \x{a81} 0: \x{a81} /^\p{Script=Gujr}/utf - \x{aff} + \x{aff} 0: \x{aff} # Script extension check /^\p{Gujarati}/utf - \x{951} + \x{951} 0: \x{951} /^\p{Script_Extensions=Gujr}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Gujarati}/utf - \x{a836} + \x{a836} 0: \x{a836} /^\p{sc=Gujarati}/utf - \x{a836} + \x{a836} No match # Character not in script /^\p{Gujarati}/utf - \x{a83a} + \x{a83a} No match # Base script check /^\p{sc=Oriya}/utf - \x{b01} + \x{b01} 0: \x{b01} /^\p{Script=Orya}/utf - \x{b77} + \x{b77} 0: \x{b77} # Script extension check /^\p{Oriya}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Orya}/utf - \x{1cf2} + \x{1cf2} 0: \x{1cf2} # Script extension only character /^\p{Oriya}/utf - \x{1cda} + \x{1cda} 0: \x{1cda} /^\p{sc=Oriya}/utf - \x{1cda} + \x{1cda} No match # Character not in script /^\p{Oriya}/utf - \x{1cf3} + \x{1cf3} No match # Base script check /^\p{sc=Tamil}/utf - \x{b82} + \x{b82} 0: \x{b82} /^\p{Script=Taml}/utf - \x{11fff} + \x{11fff} 0: \x{11fff} # Script extension check /^\p{Tamil}/utf - \x{951} + \x{951} 0: \x{951} /^\p{Script_Extensions=Taml}/utf - \x{11fd3} + \x{11fd3} 0: \x{11fd3} # Script extension only character /^\p{Tamil}/utf - \x{a8f3} + \x{a8f3} 0: \x{a8f3} /^\p{sc=Tamil}/utf - \x{a8f3} + \x{a8f3} No match # Character not in script /^\p{Tamil}/utf - \x{12000} + \x{12000} No match # Base script check /^\p{sc=Telugu}/utf - \x{c00} + \x{c00} 0: \x{c00} /^\p{Script=Telu}/utf - \x{c7f} + \x{c7f} 0: \x{c7f} # Script extension check /^\p{Telugu}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Telu}/utf - \x{1cf2} + \x{1cf2} 0: \x{1cf2} # Script extension only character /^\p{Telugu}/utf - \x{1cda} + \x{1cda} 0: \x{1cda} /^\p{sc=Telugu}/utf - \x{1cda} + \x{1cda} No match # Character not in script /^\p{Telugu}/utf - \x{1cf3} + \x{1cf3} No match # Base script check /^\p{sc=Kannada}/utf - \x{c80} + \x{c80} 0: \x{c80} /^\p{Script=Knda}/utf - \x{cf3} + \x{cf3} 0: \x{cf3} # Script extension check /^\p{Kannada}/utf - \x{951} + \x{951} 0: \x{951} /^\p{Script_Extensions=Knda}/utf - \x{a835} + \x{a835} 0: \x{a835} # Script extension only character /^\p{Kannada}/utf - \x{1cf4} + \x{1cf4} 0: \x{1cf4} /^\p{sc=Kannada}/utf - \x{1cf4} + \x{1cf4} No match # Character not in script /^\p{Kannada}/utf - \x{a836} + \x{a836} No match # Base script check /^\p{sc=Malayalam}/utf - \x{d00} + \x{d00} 0: \x{d00} /^\p{Script=Mlym}/utf - \x{d7f} + \x{d7f} 0: \x{d7f} # Script extension check /^\p{Malayalam}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Mlym}/utf - \x{a832} + \x{a832} 0: \x{a832} # Script extension only character /^\p{Malayalam}/utf - \x{1cda} + \x{1cda} 0: \x{1cda} /^\p{sc=Malayalam}/utf - \x{1cda} + \x{1cda} No match # Character not in script /^\p{Malayalam}/utf - \x{a833} + \x{a833} No match # Base script check /^\p{sc=Sinhala}/utf - \x{d81} + \x{d81} 0: \x{d81} /^\p{Script=Sinh}/utf - \x{111f4} + \x{111f4} 0: \x{111f4} # Script extension check /^\p{Sinhala}/utf - \x{964} + \x{964} 0: \x{964} /^\p{Script_Extensions=Sinh}/utf - \x{965} + \x{965} 0: \x{965} # Script extension only character /^\p{Sinhala}/utf - \x{964} + \x{964} 0: \x{964} /^\p{sc=Sinhala}/utf - \x{964} + \x{964} No match # Character not in script /^\p{Sinhala}/utf - \x{111f5} + \x{111f5} No match # Base script check /^\p{sc=Myanmar}/utf - \x{1000} + \x{1000} 0: \x{1000} /^\p{Script=Mymr}/utf - \x{aa7f} + \x{aa7f} 0: \x{aa7f} # Script extension check /^\p{Myanmar}/utf - \x{1040} + \x{1040} 0: \x{1040} /^\p{scx=Mymr}/utf - \x{a92e} + \x{a92e} 0: \x{a92e} # Script extension only character /^\p{Myanmar}/utf - \x{a92e} + \x{a92e} 0: \x{a92e} /^\p{sc=Myanmar}/utf - \x{a92e} + \x{a92e} No match # Character not in script /^\p{Myanmar}/utf - \x{aa80} + \x{aa80} No match # Base script check /^\p{sc=Georgian}/utf - \x{10a0} + \x{10a0} 0: \x{10a0} /^\p{Script=Geor}/utf - \x{2d2d} + \x{2d2d} 0: \x{2d2d} # Script extension check /^\p{Georgian}/utf - \x{10fb} + \x{10fb} 0: \x{10fb} /^\p{Script_Extensions=Geor}/utf - \x{10fb} + \x{10fb} 0: \x{10fb} # Script extension only character /^\p{Georgian}/utf - \x{10fb} + \x{10fb} 0: \x{10fb} /^\p{sc=Georgian}/utf - \x{10fb} + \x{10fb} No match # Character not in script /^\p{Georgian}/utf - \x{2d2e} + \x{2d2e} No match # Base script check /^\p{sc=Hangul}/utf - \x{1100} + \x{1100} 0: \x{1100} /^\p{Script=Hang}/utf - \x{ffdc} + \x{ffdc} 0: \x{ffdc} # Script extension check /^\p{Hangul}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{scx=Hang}/utf - \x{ff65} + \x{ff65} 0: \x{ff65} # Script extension only character /^\p{Hangul}/utf - \x{3003} + \x{3003} 0: \x{3003} /^\p{sc=Hangul}/utf - \x{3003} + \x{3003} No match # Character not in script /^\p{Hangul}/utf - \x{ffdd} + \x{ffdd} No match # Base script check /^\p{sc=Mongolian}/utf - \x{1800} + \x{1800} 0: \x{1800} /^\p{Script=Mong}/utf - \x{1166c} + \x{1166c} 0: \x{1166c} # Script extension check /^\p{Mongolian}/utf - \x{1802} + \x{1802} 0: \x{1802} /^\p{Script_Extensions=Mong}/utf - \x{202f} + \x{202f} 0: \x{202f} # Script extension only character /^\p{Mongolian}/utf - \x{202f} + \x{202f} 0: \x{202f} /^\p{sc=Mongolian}/utf - \x{202f} + \x{202f} No match # Character not in script /^\p{Mongolian}/utf - \x{1166d} + \x{1166d} No match # Base script check /^\p{sc=Hiragana}/utf - \x{3041} + \x{3041} 0: \x{3041} /^\p{Script=Hira}/utf - \x{1f200} + \x{1f200} 0: \x{1f200} # Script extension check /^\p{Hiragana}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{scx=Hira}/utf - \x{ff9f} + \x{ff9f} 0: \x{ff9f} # Script extension only character /^\p{Hiragana}/utf - \x{3031} + \x{3031} 0: \x{3031} /^\p{sc=Hiragana}/utf - \x{3031} + \x{3031} No match # Character not in script /^\p{Hiragana}/utf - \x{1f201} + \x{1f201} No match # Base script check /^\p{sc=Katakana}/utf - \x{30a1} + \x{30a1} 0: \x{30a1} /^\p{Script=Kana}/utf - \x{1b167} + \x{1b167} 0: \x{1b167} # Script extension check /^\p{Katakana}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{Script_Extensions=Kana}/utf - \x{ff9f} + \x{ff9f} 0: \x{ff9f} # Script extension only character /^\p{Katakana}/utf - \x{3031} + \x{3031} 0: \x{3031} /^\p{sc=Katakana}/utf - \x{3031} + \x{3031} No match # Character not in script /^\p{Katakana}/utf - \x{1b168} + \x{1b168} No match # Base script check /^\p{sc=Bopomofo}/utf - \x{2ea} + \x{2ea} 0: \x{2ea} /^\p{Script=Bopo}/utf - \x{31bf} + \x{31bf} 0: \x{31bf} # Script extension check /^\p{Bopomofo}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{scx=Bopo}/utf - \x{ff65} + \x{ff65} 0: \x{ff65} # Script extension only character /^\p{Bopomofo}/utf - \x{302a} + \x{302a} 0: \x{302a} /^\p{sc=Bopomofo}/utf - \x{302a} + \x{302a} No match # Character not in script /^\p{Bopomofo}/utf - \x{ff66} + \x{ff66} No match # Base script check /^\p{sc=Han}/utf - \x{2e80} + \x{2e80} 0: \x{2e80} /^\p{Script=Hani}/utf - \x{323af} + \x{323af} 0: \x{323af} # Script extension check /^\p{Han}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{Script_Extensions=Hani}/utf - \x{1f251} + \x{1f251} 0: \x{1f251} # Script extension only character /^\p{Han}/utf - \x{3006} + \x{3006} 0: \x{3006} /^\p{sc=Han}/utf - \x{3006} + \x{3006} No match # Character not in script /^\p{Han}/utf - \x{323b0} + \x{323b0} No match # Base script check /^\p{sc=Yi}/utf - \x{a000} + \x{a000} 0: \x{a000} /^\p{Script=Yiii}/utf - \x{a4c6} + \x{a4c6} 0: \x{a4c6} # Script extension check /^\p{Yi}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{scx=Yiii}/utf - \x{ff65} + \x{ff65} 0: \x{ff65} # Script extension only character /^\p{Yi}/utf - \x{3001} + \x{3001} 0: \x{3001} /^\p{sc=Yi}/utf - \x{3001} + \x{3001} No match # Character not in script /^\p{Yi}/utf - \x{ff66} + \x{ff66} No match # Base script check /^\p{sc=Tagalog}/utf - \x{1700} + \x{1700} 0: \x{1700} /^\p{Script=Tglg}/utf - \x{171f} + \x{171f} 0: \x{171f} # Script extension check /^\p{Tagalog}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{Script_Extensions=Tglg}/utf - \x{1736} + \x{1736} 0: \x{1736} # Script extension only character /^\p{Tagalog}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{sc=Tagalog}/utf - \x{1735} + \x{1735} No match # Character not in script /^\p{Tagalog}/utf - \x{1737} + \x{1737} No match # Base script check /^\p{sc=Hanunoo}/utf - \x{1720} + \x{1720} 0: \x{1720} /^\p{Script=Hano}/utf - \x{1734} + \x{1734} 0: \x{1734} # Script extension check /^\p{Hanunoo}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{scx=Hano}/utf - \x{1736} + \x{1736} 0: \x{1736} # Script extension only character /^\p{Hanunoo}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{sc=Hanunoo}/utf - \x{1735} + \x{1735} No match # Character not in script /^\p{Hanunoo}/utf - \x{1737} + \x{1737} No match # Base script check /^\p{sc=Buhid}/utf - \x{1740} + \x{1740} 0: \x{1740} /^\p{Script=Buhd}/utf - \x{1753} + \x{1753} 0: \x{1753} # Script extension check /^\p{Buhid}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{Script_Extensions=Buhd}/utf - \x{1736} + \x{1736} 0: \x{1736} # Script extension only character /^\p{Buhid}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{sc=Buhid}/utf - \x{1735} + \x{1735} No match # Character not in script /^\p{Buhid}/utf - \x{1754} + \x{1754} No match # Base script check /^\p{sc=Tagbanwa}/utf - \x{1760} + \x{1760} 0: \x{1760} /^\p{Script=Tagb}/utf - \x{1773} + \x{1773} 0: \x{1773} # Script extension check /^\p{Tagbanwa}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{scx=Tagb}/utf - \x{1736} + \x{1736} 0: \x{1736} # Script extension only character /^\p{Tagbanwa}/utf - \x{1735} + \x{1735} 0: \x{1735} /^\p{sc=Tagbanwa}/utf - \x{1735} + \x{1735} No match # Character not in script /^\p{Tagbanwa}/utf - \x{1774} + \x{1774} No match # Base script check /^\p{sc=Limbu}/utf - \x{1900} + \x{1900} 0: \x{1900} /^\p{Script=Limb}/utf - \x{194f} + \x{194f} 0: \x{194f} # Script extension check /^\p{Limbu}/utf - \x{965} + \x{965} 0: \x{965} /^\p{Script_Extensions=Limb}/utf - \x{965} + \x{965} 0: \x{965} # Script extension only character /^\p{Limbu}/utf - \x{965} + \x{965} 0: \x{965} /^\p{sc=Limbu}/utf - \x{965} + \x{965} No match # Character not in script /^\p{Limbu}/utf - \x{1950} + \x{1950} No match # Base script check /^\p{sc=Tai_Le}/utf - \x{1950} + \x{1950} 0: \x{1950} /^\p{Script=Tale}/utf - \x{1974} + \x{1974} 0: \x{1974} # Script extension check /^\p{Tai_Le}/utf - \x{1040} + \x{1040} 0: \x{1040} /^\p{scx=Tale}/utf - \x{1049} + \x{1049} 0: \x{1049} # Script extension only character /^\p{Tai_Le}/utf - \x{1040} + \x{1040} 0: \x{1040} /^\p{sc=Tai_Le}/utf - \x{1040} + \x{1040} No match # Character not in script /^\p{Tai_Le}/utf - \x{1975} + \x{1975} No match # Base script check /^\p{sc=Linear_B}/utf - \x{10000} + \x{10000} 0: \x{10000} /^\p{Script=Linb}/utf - \x{100fa} + \x{100fa} 0: \x{100fa} # Script extension check /^\p{Linear_B}/utf - \x{10100} + \x{10100} 0: \x{10100} /^\p{Script_Extensions=Linb}/utf - \x{1013f} + \x{1013f} 0: \x{1013f} # Script extension only character /^\p{Linear_B}/utf - \x{10102} + \x{10102} 0: \x{10102} /^\p{sc=Linear_B}/utf - \x{10102} + \x{10102} No match # Character not in script /^\p{Linear_B}/utf - \x{10140} + \x{10140} No match # Base script check /^\p{sc=Cypriot}/utf - \x{10800} + \x{10800} 0: \x{10800} /^\p{Script=Cprt}/utf - \x{1083f} + \x{1083f} 0: \x{1083f} # Script extension check /^\p{Cypriot}/utf - \x{10100} + \x{10100} 0: \x{10100} /^\p{scx=Cprt}/utf - \x{1013f} + \x{1013f} 0: \x{1013f} # Script extension only character /^\p{Cypriot}/utf - \x{10102} + \x{10102} 0: \x{10102} /^\p{sc=Cypriot}/utf - \x{10102} + \x{10102} No match # Character not in script /^\p{Cypriot}/utf - \x{10840} + \x{10840} No match # Base script check /^\p{sc=Buginese}/utf - \x{1a00} + \x{1a00} 0: \x{1a00} /^\p{Script=Bugi}/utf - \x{1a1f} + \x{1a1f} 0: \x{1a1f} # Script extension check /^\p{Buginese}/utf - \x{a9cf} + \x{a9cf} 0: \x{a9cf} /^\p{Script_Extensions=Bugi}/utf - \x{a9cf} + \x{a9cf} 0: \x{a9cf} # Script extension only character /^\p{Buginese}/utf - \x{a9cf} + \x{a9cf} 0: \x{a9cf} /^\p{sc=Buginese}/utf - \x{a9cf} + \x{a9cf} No match # Character not in script /^\p{Buginese}/utf - \x{a9d0} + \x{a9d0} No match # Base script check /^\p{sc=Coptic}/utf - \x{3e2} + \x{3e2} 0: \x{3e2} /^\p{Script=Copt}/utf - \x{2cff} + \x{2cff} 0: \x{2cff} # Script extension check /^\p{Coptic}/utf - \x{102e0} + \x{102e0} 0: \x{102e0} /^\p{scx=Copt}/utf - \x{102fb} + \x{102fb} 0: \x{102fb} # Script extension only character /^\p{Coptic}/utf - \x{102e0} + \x{102e0} 0: \x{102e0} /^\p{sc=Coptic}/utf - \x{102e0} + \x{102e0} No match # Character not in script /^\p{Coptic}/utf - \x{102fc} + \x{102fc} No match # Base script check /^\p{sc=Glagolitic}/utf - \x{2c00} + \x{2c00} 0: \x{2c00} /^\p{Script=Glag}/utf - \x{1e02a} + \x{1e02a} 0: \x{1e02a} # Script extension check /^\p{Glagolitic}/utf - \x{484} + \x{484} 0: \x{484} /^\p{Script_Extensions=Glag}/utf - \x{a66f} + \x{a66f} 0: \x{a66f} # Script extension only character /^\p{Glagolitic}/utf - \x{484} + \x{484} 0: \x{484} /^\p{sc=Glagolitic}/utf - \x{484} + \x{484} No match # Character not in script /^\p{Glagolitic}/utf - \x{1e02b} + \x{1e02b} No match # Base script check /^\p{sc=Syloti_Nagri}/utf - \x{a800} + \x{a800} 0: \x{a800} /^\p{Script=Sylo}/utf - \x{a82c} + \x{a82c} 0: \x{a82c} # Script extension check /^\p{Syloti_Nagri}/utf - \x{964} + \x{964} 0: \x{964} /^\p{scx=Sylo}/utf - \x{9ef} + \x{9ef} 0: \x{9ef} # Script extension only character /^\p{Syloti_Nagri}/utf - \x{9e6} + \x{9e6} 0: \x{9e6} /^\p{sc=Syloti_Nagri}/utf - \x{9e6} + \x{9e6} No match # Character not in script /^\p{Syloti_Nagri}/utf - \x{a82d} + \x{a82d} No match # Base script check /^\p{sc=Phags_Pa}/utf - \x{a840} + \x{a840} 0: \x{a840} /^\p{Script=Phag}/utf - \x{a877} + \x{a877} 0: \x{a877} # Script extension check /^\p{Phags_Pa}/utf - \x{1802} + \x{1802} 0: \x{1802} /^\p{Script_Extensions=Phag}/utf - \x{1805} + \x{1805} 0: \x{1805} # Script extension only character /^\p{Phags_Pa}/utf - \x{1802} + \x{1802} 0: \x{1802} /^\p{sc=Phags_Pa}/utf - \x{1802} + \x{1802} No match # Character not in script /^\p{Phags_Pa}/utf - \x{a878} + \x{a878} No match # Base script check /^\p{sc=Nko}/utf - \x{7c0} + \x{7c0} 0: \x{7c0} /^\p{Script=Nkoo}/utf - \x{7ff} + \x{7ff} 0: \x{7ff} # Script extension check /^\p{Nko}/utf - \x{60c} + \x{60c} 0: \x{60c} /^\p{scx=Nkoo}/utf - \x{fd3f} + \x{fd3f} 0: \x{fd3f} # Script extension only character /^\p{Nko}/utf - \x{fd3e} + \x{fd3e} 0: \x{fd3e} /^\p{sc=Nko}/utf - \x{fd3e} + \x{fd3e} No match # Character not in script /^\p{Nko}/utf - \x{fd40} + \x{fd40} No match # Base script check /^\p{sc=Kayah_Li}/utf - \x{a900} + \x{a900} 0: \x{a900} /^\p{Script=Kali}/utf - \x{a92f} + \x{a92f} 0: \x{a92f} # Script extension check /^\p{Kayah_Li}/utf - \x{a92e} + \x{a92e} 0: \x{a92e} /^\p{Script_Extensions=Kali}/utf - \x{a92e} + \x{a92e} 0: \x{a92e} # Script extension only character /^\p{Kayah_Li}/utf - \x{a92e} + \x{a92e} 0: \x{a92e} /^\p{sc=Kayah_Li}/utf - \x{a92e} + \x{a92e} No match # Character not in script /^\p{Kayah_Li}/utf - \x{a930} + \x{a930} No match # Base script check /^\p{sc=Javanese}/utf - \x{a980} + \x{a980} 0: \x{a980} /^\p{Script=Java}/utf - \x{a9df} + \x{a9df} 0: \x{a9df} # Script extension check /^\p{Javanese}/utf - \x{a9cf} + \x{a9cf} 0: \x{a9cf} /^\p{scx=Java}/utf - \x{a9cf} + \x{a9cf} 0: \x{a9cf} # Script extension only character /^\p{Javanese}/utf - \x{a9cf} + \x{a9cf} 0: \x{a9cf} /^\p{sc=Javanese}/utf - \x{a9cf} + \x{a9cf} No match # Character not in script /^\p{Javanese}/utf - \x{a9e0} + \x{a9e0} No match # Base script check /^\p{sc=Kaithi}/utf - \x{11080} + \x{11080} 0: \x{11080} /^\p{Script=Kthi}/utf - \x{110cd} + \x{110cd} 0: \x{110cd} # Script extension check /^\p{Kaithi}/utf - \x{966} + \x{966} 0: \x{966} /^\p{Script_Extensions=Kthi}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Kaithi}/utf - \x{966} + \x{966} 0: \x{966} /^\p{sc=Kaithi}/utf - \x{966} + \x{966} No match # Character not in script /^\p{Kaithi}/utf - \x{110ce} + \x{110ce} No match # Base script check /^\p{sc=Mandaic}/utf - \x{840} + \x{840} 0: \x{840} /^\p{Script=Mand}/utf - \x{85e} + \x{85e} 0: \x{85e} # Script extension check /^\p{Mandaic}/utf - \x{640} + \x{640} 0: \x{640} /^\p{scx=Mand}/utf - \x{640} + \x{640} 0: \x{640} # Script extension only character /^\p{Mandaic}/utf - \x{640} + \x{640} 0: \x{640} /^\p{sc=Mandaic}/utf - \x{640} + \x{640} No match # Character not in script /^\p{Mandaic}/utf - \x{85f} + \x{85f} No match # Base script check /^\p{sc=Chakma}/utf - \x{11100} + \x{11100} 0: \x{11100} /^\p{Script=Cakm}/utf - \x{11147} + \x{11147} 0: \x{11147} # Script extension check /^\p{Chakma}/utf - \x{9e6} + \x{9e6} 0: \x{9e6} /^\p{Script_Extensions=Cakm}/utf - \x{1049} + \x{1049} 0: \x{1049} # Script extension only character /^\p{Chakma}/utf - \x{9e6} + \x{9e6} 0: \x{9e6} /^\p{sc=Chakma}/utf - \x{9e6} + \x{9e6} No match # Character not in script /^\p{Chakma}/utf - \x{11148} + \x{11148} No match # Base script check /^\p{sc=Sharada}/utf - \x{11180} + \x{11180} 0: \x{11180} /^\p{Script=Shrd}/utf - \x{111df} + \x{111df} 0: \x{111df} # Script extension check /^\p{Sharada}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Shrd}/utf - \x{1ce0} + \x{1ce0} 0: \x{1ce0} # Script extension only character /^\p{Sharada}/utf - \x{1cd7} + \x{1cd7} 0: \x{1cd7} /^\p{sc=Sharada}/utf - \x{1cd7} + \x{1cd7} No match # Character not in script /^\p{Sharada}/utf - \x{111e0} + \x{111e0} No match # Base script check /^\p{sc=Takri}/utf - \x{11680} + \x{11680} 0: \x{11680} /^\p{Script=Takr}/utf - \x{116c9} + \x{116c9} 0: \x{116c9} # Script extension check /^\p{Takri}/utf - \x{964} + \x{964} 0: \x{964} /^\p{Script_Extensions=Takr}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Takri}/utf - \x{a836} + \x{a836} 0: \x{a836} /^\p{sc=Takri}/utf - \x{a836} + \x{a836} No match # Character not in script /^\p{Takri}/utf - \x{116ca} + \x{116ca} No match # Base script check /^\p{sc=Duployan}/utf - \x{1bc00} + \x{1bc00} 0: \x{1bc00} /^\p{Script=Dupl}/utf - \x{1bc9f} + \x{1bc9f} 0: \x{1bc9f} # Script extension check /^\p{Duployan}/utf - \x{1bca0} + \x{1bca0} 0: \x{1bca0} /^\p{scx=Dupl}/utf - \x{1bca3} + \x{1bca3} 0: \x{1bca3} # Script extension only character /^\p{Duployan}/utf - \x{1bca0} + \x{1bca0} 0: \x{1bca0} /^\p{sc=Duployan}/utf - \x{1bca0} + \x{1bca0} No match # Character not in script /^\p{Duployan}/utf - \x{1bca4} + \x{1bca4} No match # Base script check /^\p{sc=Grantha}/utf - \x{11300} + \x{11300} 0: \x{11300} /^\p{Script=Gran}/utf - \x{11374} + \x{11374} 0: \x{11374} # Script extension check /^\p{Grantha}/utf - \x{951} + \x{951} 0: \x{951} /^\p{Script_Extensions=Gran}/utf - \x{11fd3} + \x{11fd3} 0: \x{11fd3} # Script extension only character /^\p{Grantha}/utf - \x{1cd3} + \x{1cd3} 0: \x{1cd3} /^\p{sc=Grantha}/utf - \x{1cd3} + \x{1cd3} No match # Character not in script /^\p{Grantha}/utf - \x{11fd4} + \x{11fd4} No match # Base script check /^\p{sc=Khojki}/utf - \x{11200} + \x{11200} 0: \x{11200} /^\p{Script=Khoj}/utf - \x{11241} + \x{11241} 0: \x{11241} # Script extension check /^\p{Khojki}/utf - \x{ae6} + \x{ae6} 0: \x{ae6} /^\p{scx=Khoj}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Khojki}/utf - \x{ae6} + \x{ae6} 0: \x{ae6} /^\p{sc=Khojki}/utf - \x{ae6} + \x{ae6} No match # Character not in script /^\p{Khojki}/utf - \x{11242} + \x{11242} No match # Base script check /^\p{sc=Linear_A}/utf - \x{10600} + \x{10600} 0: \x{10600} /^\p{Script=Lina}/utf - \x{10767} + \x{10767} 0: \x{10767} # Script extension check /^\p{Linear_A}/utf - \x{10107} + \x{10107} 0: \x{10107} /^\p{Script_Extensions=Lina}/utf - \x{10133} + \x{10133} 0: \x{10133} # Script extension only character /^\p{Linear_A}/utf - \x{10107} + \x{10107} 0: \x{10107} /^\p{sc=Linear_A}/utf - \x{10107} + \x{10107} No match # Character not in script /^\p{Linear_A}/utf - \x{10768} + \x{10768} No match # Base script check /^\p{sc=Mahajani}/utf - \x{11150} + \x{11150} 0: \x{11150} /^\p{Script=Mahj}/utf - \x{11176} + \x{11176} 0: \x{11176} # Script extension check /^\p{Mahajani}/utf - \x{964} + \x{964} 0: \x{964} /^\p{scx=Mahj}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Mahajani}/utf - \x{966} + \x{966} 0: \x{966} /^\p{sc=Mahajani}/utf - \x{966} + \x{966} No match # Character not in script /^\p{Mahajani}/utf - \x{11177} + \x{11177} No match # Base script check /^\p{sc=Manichaean}/utf - \x{10ac0} + \x{10ac0} 0: \x{10ac0} /^\p{Script=Mani}/utf - \x{10af6} + \x{10af6} 0: \x{10af6} # Script extension check /^\p{Manichaean}/utf - \x{640} + \x{640} 0: \x{640} /^\p{Script_Extensions=Mani}/utf - \x{10af2} + \x{10af2} 0: \x{10af2} # Script extension only character /^\p{Manichaean}/utf - \x{640} + \x{640} 0: \x{640} /^\p{sc=Manichaean}/utf - \x{640} + \x{640} No match # Character not in script /^\p{Manichaean}/utf - \x{10af7} + \x{10af7} No match # Base script check /^\p{sc=Modi}/utf - \x{11600} + \x{11600} 0: \x{11600} /^\p{Script=Modi}/utf - \x{11659} + \x{11659} 0: \x{11659} # Script extension check /^\p{Modi}/utf - \x{a830} + \x{a830} 0: \x{a830} /^\p{scx=Modi}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Modi}/utf - \x{a836} + \x{a836} 0: \x{a836} /^\p{sc=Modi}/utf - \x{a836} + \x{a836} No match # Character not in script /^\p{Modi}/utf - \x{1165a} + \x{1165a} No match # Base script check /^\p{sc=Old_Permic}/utf - \x{10350} + \x{10350} 0: \x{10350} /^\p{Script=Perm}/utf - \x{1037a} + \x{1037a} 0: \x{1037a} # Script extension check /^\p{Old_Permic}/utf - \x{483} + \x{483} 0: \x{483} /^\p{Script_Extensions=Perm}/utf - \x{483} + \x{483} 0: \x{483} # Script extension only character /^\p{Old_Permic}/utf - \x{483} + \x{483} 0: \x{483} /^\p{sc=Old_Permic}/utf - \x{483} + \x{483} No match # Character not in script /^\p{Old_Permic}/utf - \x{1037b} + \x{1037b} No match # Base script check /^\p{sc=Psalter_Pahlavi}/utf - \x{10b80} + \x{10b80} 0: \x{10b80} /^\p{Script=Phlp}/utf - \x{10baf} + \x{10baf} 0: \x{10baf} # Script extension check /^\p{Psalter_Pahlavi}/utf - \x{640} + \x{640} 0: \x{640} /^\p{scx=Phlp}/utf - \x{640} + \x{640} 0: \x{640} # Script extension only character /^\p{Psalter_Pahlavi}/utf - \x{640} + \x{640} 0: \x{640} /^\p{sc=Psalter_Pahlavi}/utf - \x{640} + \x{640} No match # Character not in script /^\p{Psalter_Pahlavi}/utf - \x{10bb0} + \x{10bb0} No match # Base script check /^\p{sc=Khudawadi}/utf - \x{112b0} + \x{112b0} 0: \x{112b0} /^\p{Script=Sind}/utf - \x{112f9} + \x{112f9} 0: \x{112f9} # Script extension check /^\p{Khudawadi}/utf - \x{964} + \x{964} 0: \x{964} /^\p{Script_Extensions=Sind}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Khudawadi}/utf - \x{a836} + \x{a836} 0: \x{a836} /^\p{sc=Khudawadi}/utf - \x{a836} + \x{a836} No match # Character not in script /^\p{Khudawadi}/utf - \x{112fa} + \x{112fa} No match # Base script check /^\p{sc=Tirhuta}/utf - \x{11480} + \x{11480} 0: \x{11480} /^\p{Script=Tirh}/utf - \x{114d9} + \x{114d9} 0: \x{114d9} # Script extension check /^\p{Tirhuta}/utf - \x{951} + \x{951} 0: \x{951} /^\p{scx=Tirh}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Tirhuta}/utf - \x{1cf2} + \x{1cf2} 0: \x{1cf2} /^\p{sc=Tirhuta}/utf - \x{1cf2} + \x{1cf2} No match # Character not in script /^\p{Tirhuta}/utf - \x{114da} + \x{114da} No match # Base script check /^\p{sc=Multani}/utf - \x{11280} + \x{11280} 0: \x{11280} /^\p{Script=Mult}/utf - \x{112a9} + \x{112a9} 0: \x{112a9} # Script extension check /^\p{Multani}/utf - \x{a66} + \x{a66} 0: \x{a66} /^\p{Script_Extensions=Mult}/utf - \x{a6f} + \x{a6f} 0: \x{a6f} # Script extension only character /^\p{Multani}/utf - \x{a66} + \x{a66} 0: \x{a66} /^\p{sc=Multani}/utf - \x{a66} + \x{a66} No match # Character not in script /^\p{Multani}/utf - \x{112aa} + \x{112aa} No match # Base script check /^\p{sc=Adlam}/utf - \x{1e900} + \x{1e900} 0: \x{1e900} /^\p{Script=Adlm}/utf - \x{1e95f} + \x{1e95f} 0: \x{1e95f} # Script extension check /^\p{Adlam}/utf - \x{61f} + \x{61f} 0: \x{61f} /^\p{scx=Adlm}/utf - \x{640} + \x{640} 0: \x{640} # Script extension only character /^\p{Adlam}/utf - \x{61f} + \x{61f} 0: \x{61f} /^\p{sc=Adlam}/utf - \x{61f} + \x{61f} No match # Character not in script /^\p{Adlam}/utf - \x{1e960} + \x{1e960} No match # Base script check /^\p{sc=Masaram_Gondi}/utf - \x{11d00} + \x{11d00} 0: \x{11d00} /^\p{Script=Gonm}/utf - \x{11d59} + \x{11d59} 0: \x{11d59} # Script extension check /^\p{Masaram_Gondi}/utf - \x{964} + \x{964} 0: \x{964} /^\p{Script_Extensions=Gonm}/utf - \x{965} + \x{965} 0: \x{965} # Script extension only character /^\p{Masaram_Gondi}/utf - \x{964} + \x{964} 0: \x{964} /^\p{sc=Masaram_Gondi}/utf - \x{964} + \x{964} No match # Character not in script /^\p{Masaram_Gondi}/utf - \x{11d5a} + \x{11d5a} No match # Base script check /^\p{sc=Dogra}/utf - \x{11800} + \x{11800} 0: \x{11800} /^\p{Script=Dogr}/utf - \x{1183b} + \x{1183b} 0: \x{1183b} # Script extension check /^\p{Dogra}/utf - \x{964} + \x{964} 0: \x{964} /^\p{scx=Dogr}/utf - \x{a839} + \x{a839} 0: \x{a839} # Script extension only character /^\p{Dogra}/utf - \x{966} + \x{966} 0: \x{966} /^\p{sc=Dogra}/utf - \x{966} + \x{966} No match # Character not in script /^\p{Dogra}/utf - \x{1183c} + \x{1183c} No match # Base script check /^\p{sc=Gunjala_Gondi}/utf - \x{11d60} + \x{11d60} 0: \x{11d60} /^\p{Script=Gong}/utf - \x{11da9} + \x{11da9} 0: \x{11da9} # Script extension check /^\p{Gunjala_Gondi}/utf - \x{964} + \x{964} 0: \x{964} /^\p{Script_Extensions=Gong}/utf - \x{965} + \x{965} 0: \x{965} # Script extension only character /^\p{Gunjala_Gondi}/utf - \x{964} + \x{964} 0: \x{964} /^\p{sc=Gunjala_Gondi}/utf - \x{964} + \x{964} No match # Character not in script /^\p{Gunjala_Gondi}/utf - \x{11daa} + \x{11daa} No match # Base script check /^\p{sc=Hanifi_Rohingya}/utf - \x{10d00} + \x{10d00} 0: \x{10d00} /^\p{Script=Rohg}/utf - \x{10d39} + \x{10d39} 0: \x{10d39} # Script extension check /^\p{Hanifi_Rohingya}/utf - \x{60c} + \x{60c} 0: \x{60c} /^\p{scx=Rohg}/utf - \x{6d4} + \x{6d4} 0: \x{6d4} # Script extension only character /^\p{Hanifi_Rohingya}/utf - \x{6d4} + \x{6d4} 0: \x{6d4} /^\p{sc=Hanifi_Rohingya}/utf - \x{6d4} + \x{6d4} No match # Character not in script /^\p{Hanifi_Rohingya}/utf - \x{10d3a} + \x{10d3a} No match # Base script check /^\p{sc=Sogdian}/utf - \x{10f30} + \x{10f30} 0: \x{10f30} /^\p{Script=Sogd}/utf - \x{10f59} + \x{10f59} 0: \x{10f59} # Script extension check /^\p{Sogdian}/utf - \x{640} + \x{640} 0: \x{640} /^\p{Script_Extensions=Sogd}/utf - \x{640} + \x{640} 0: \x{640} # Script extension only character /^\p{Sogdian}/utf - \x{640} + \x{640} 0: \x{640} /^\p{sc=Sogdian}/utf - \x{640} + \x{640} No match # Character not in script /^\p{Sogdian}/utf - \x{10f5a} + \x{10f5a} No match # Base script check /^\p{sc=Nandinagari}/utf - \x{119a0} + \x{119a0} 0: \x{119a0} /^\p{Script=Nand}/utf - \x{119e4} + \x{119e4} 0: \x{119e4} # Script extension check /^\p{Nandinagari}/utf - \x{964} + \x{964} 0: \x{964} /^\p{scx=Nand}/utf - \x{a835} + \x{a835} 0: \x{a835} # Script extension only character /^\p{Nandinagari}/utf - \x{1cfa} + \x{1cfa} 0: \x{1cfa} /^\p{sc=Nandinagari}/utf - \x{1cfa} + \x{1cfa} No match # Character not in script /^\p{Nandinagari}/utf - \x{119e5} + \x{119e5} No match # Base script check /^\p{sc=Yezidi}/utf - \x{10e80} + \x{10e80} 0: \x{10e80} /^\p{Script=Yezi}/utf - \x{10eb1} + \x{10eb1} 0: \x{10eb1} # Script extension check /^\p{Yezidi}/utf - \x{60c} + \x{60c} 0: \x{60c} /^\p{Script_Extensions=Yezi}/utf - \x{669} + \x{669} 0: \x{669} # Script extension only character /^\p{Yezidi}/utf - \x{660} + \x{660} 0: \x{660} /^\p{sc=Yezidi}/utf - \x{660} + \x{660} No match # Character not in script /^\p{Yezidi}/utf - \x{10eb2} + \x{10eb2} No match # Base script check /^\p{sc=Cypro_Minoan}/utf - \x{12f90} + \x{12f90} 0: \x{12f90} /^\p{Script=Cpmn}/utf - \x{12ff2} + \x{12ff2} 0: \x{12ff2} # Script extension check /^\p{Cypro_Minoan}/utf - \x{10100} + \x{10100} 0: \x{10100} /^\p{scx=Cpmn}/utf - \x{10101} + \x{10101} 0: \x{10101} # Script extension only character /^\p{Cypro_Minoan}/utf - \x{10100} + \x{10100} 0: \x{10100} /^\p{sc=Cypro_Minoan}/utf - \x{10100} + \x{10100} No match # Character not in script /^\p{Cypro_Minoan}/utf - \x{12ff3} + \x{12ff3} No match # Base script check /^\p{sc=Old_Uyghur}/utf - \x{10f70} + \x{10f70} 0: \x{10f70} /^\p{Script=Ougr}/utf - \x{10f89} + \x{10f89} 0: \x{10f89} # Script extension check /^\p{Old_Uyghur}/utf - \x{640} + \x{640} 0: \x{640} /^\p{Script_Extensions=Ougr}/utf - \x{10af2} + \x{10af2} 0: \x{10af2} # Script extension only character /^\p{Old_Uyghur}/utf - \x{10af2} + \x{10af2} 0: \x{10af2} /^\p{sc=Old_Uyghur}/utf - \x{10af2} + \x{10af2} No match # Character not in script /^\p{Old_Uyghur}/utf - \x{10f8a} + \x{10f8a} No match # Base script check /^\p{sc=Common}/utf - \x{00} + \x{00} 0: \x{00} /^\p{Script=Zyyy}/utf - \x{e007f} + \x{e007f} 0: \x{e007f} # Character not in script /^\p{Common}/utf - \x{e0080} + \x{e0080} No match # Base script check /^\p{sc=Armenian}/utf - \x{531} + \x{531} 0: \x{531} /^\p{Script=Armn}/utf - \x{fb17} + \x{fb17} 0: \x{fb17} # Character not in script /^\p{Armenian}/utf - \x{fb18} + \x{fb18} No match # Base script check /^\p{sc=Hebrew}/utf - \x{591} + \x{591} 0: \x{591} /^\p{Script=Hebr}/utf - \x{fb4f} + \x{fb4f} 0: \x{fb4f} # Character not in script /^\p{Hebrew}/utf - \x{fb50} + \x{fb50} No match # Base script check /^\p{sc=Thai}/utf - \x{e01} + \x{e01} 0: \x{e01} /^\p{Script=Thai}/utf - \x{e5b} + \x{e5b} 0: \x{e5b} # Character not in script /^\p{Thai}/utf - \x{e5c} + \x{e5c} No match # Base script check /^\p{sc=Lao}/utf - \x{e81} + \x{e81} 0: \x{e81} /^\p{Script=Laoo}/utf - \x{edf} + \x{edf} 0: \x{edf} # Character not in script /^\p{Lao}/utf - \x{ee0} + \x{ee0} No match # Base script check /^\p{sc=Tibetan}/utf - \x{f00} + \x{f00} 0: \x{f00} /^\p{Script=Tibt}/utf - \x{fda} + \x{fda} 0: \x{fda} # Character not in script /^\p{Tibetan}/utf - \x{fdb} + \x{fdb} No match # Base script check /^\p{sc=Ethiopic}/utf - \x{1200} + \x{1200} 0: \x{1200} /^\p{Script=Ethi}/utf - \x{1e7fe} + \x{1e7fe} 0: \x{1e7fe} # Character not in script /^\p{Ethiopic}/utf - \x{1e7ff} + \x{1e7ff} No match # Base script check /^\p{sc=Cherokee}/utf - \x{13a0} + \x{13a0} 0: \x{13a0} /^\p{Script=Cher}/utf - \x{abbf} + \x{abbf} 0: \x{abbf} # Character not in script /^\p{Cherokee}/utf - \x{abc0} + \x{abc0} No match # Base script check /^\p{sc=Canadian_Aboriginal}/utf - \x{1400} + \x{1400} 0: \x{1400} /^\p{Script=Cans}/utf - \x{11abf} + \x{11abf} 0: \x{11abf} # Character not in script /^\p{Canadian_Aboriginal}/utf - \x{11ac0} + \x{11ac0} No match # Base script check /^\p{sc=Ogham}/utf - \x{1680} + \x{1680} 0: \x{1680} /^\p{Script=Ogam}/utf - \x{169c} + \x{169c} 0: \x{169c} # Character not in script /^\p{Ogham}/utf - \x{169d} + \x{169d} No match # Base script check /^\p{sc=Runic}/utf - \x{16a0} + \x{16a0} 0: \x{16a0} /^\p{Script=Runr}/utf - \x{16f8} + \x{16f8} 0: \x{16f8} # Character not in script /^\p{Runic}/utf - \x{16f9} + \x{16f9} No match # Base script check /^\p{sc=Khmer}/utf - \x{1780} + \x{1780} 0: \x{1780} /^\p{Script=Khmr}/utf - \x{19ff} + \x{19ff} 0: \x{19ff} # Character not in script /^\p{Khmer}/utf - \x{1a00} + \x{1a00} No match # Base script check /^\p{sc=Old_Italic}/utf - \x{10300} + \x{10300} 0: \x{10300} /^\p{Script=Ital}/utf - \x{1032f} + \x{1032f} 0: \x{1032f} # Character not in script /^\p{Old_Italic}/utf - \x{10330} + \x{10330} No match # Base script check /^\p{sc=Gothic}/utf - \x{10330} + \x{10330} 0: \x{10330} /^\p{Script=Goth}/utf - \x{1034a} + \x{1034a} 0: \x{1034a} # Character not in script /^\p{Gothic}/utf - \x{1034b} + \x{1034b} No match # Base script check /^\p{sc=Deseret}/utf - \x{10400} + \x{10400} 0: \x{10400} /^\p{Script=Dsrt}/utf - \x{1044f} + \x{1044f} 0: \x{1044f} # Character not in script /^\p{Deseret}/utf - \x{10450} + \x{10450} No match # Base script check /^\p{sc=Inherited}/utf - \x{300} + \x{300} 0: \x{300} /^\p{Script=Zinh}/utf - \x{e01ef} + \x{e01ef} 0: \x{e01ef} # Character not in script /^\p{Inherited}/utf - \x{e01f0} + \x{e01f0} No match # Base script check /^\p{sc=Ugaritic}/utf - \x{10380} + \x{10380} 0: \x{10380} /^\p{Script=Ugar}/utf - \x{1039f} + \x{1039f} 0: \x{1039f} # Character not in script /^\p{Ugaritic}/utf - \x{103a0} + \x{103a0} No match # Base script check /^\p{sc=Shavian}/utf - \x{10450} + \x{10450} 0: \x{10450} /^\p{Script=Shaw}/utf - \x{1047f} + \x{1047f} 0: \x{1047f} # Character not in script /^\p{Shavian}/utf - \x{10480} + \x{10480} No match # Base script check /^\p{sc=Osmanya}/utf - \x{10480} + \x{10480} 0: \x{10480} /^\p{Script=Osma}/utf - \x{104a9} + \x{104a9} 0: \x{104a9} # Character not in script /^\p{Osmanya}/utf - \x{104aa} + \x{104aa} No match # Base script check /^\p{sc=Braille}/utf - \x{2800} + \x{2800} 0: \x{2800} /^\p{Script=Brai}/utf - \x{28ff} + \x{28ff} 0: \x{28ff} # Character not in script /^\p{Braille}/utf - \x{2900} + \x{2900} No match # Base script check /^\p{sc=New_Tai_Lue}/utf - \x{1980} + \x{1980} 0: \x{1980} /^\p{Script=Talu}/utf - \x{19df} + \x{19df} 0: \x{19df} # Character not in script /^\p{New_Tai_Lue}/utf - \x{19e0} + \x{19e0} No match # Base script check /^\p{sc=Tifinagh}/utf - \x{2d30} + \x{2d30} 0: \x{2d30} /^\p{Script=Tfng}/utf - \x{2d7f} + \x{2d7f} 0: \x{2d7f} # Character not in script /^\p{Tifinagh}/utf - \x{2d80} + \x{2d80} No match # Base script check /^\p{sc=Old_Persian}/utf - \x{103a0} + \x{103a0} 0: \x{103a0} /^\p{Script=Xpeo}/utf - \x{103d5} + \x{103d5} 0: \x{103d5} # Character not in script /^\p{Old_Persian}/utf - \x{103d6} + \x{103d6} No match # Base script check /^\p{sc=Kharoshthi}/utf - \x{10a00} + \x{10a00} 0: \x{10a00} /^\p{Script=Khar}/utf - \x{10a58} + \x{10a58} 0: \x{10a58} # Character not in script /^\p{Kharoshthi}/utf - \x{10a59} + \x{10a59} No match # Base script check /^\p{sc=Balinese}/utf - \x{1b00} + \x{1b00} 0: \x{1b00} /^\p{Script=Bali}/utf - \x{1b7e} + \x{1b7e} 0: \x{1b7e} # Character not in script /^\p{Balinese}/utf - \x{1b7f} + \x{1b8f} No match # Base script check /^\p{sc=Cuneiform}/utf - \x{12000} + \x{12000} 0: \x{12000} /^\p{Script=Xsux}/utf - \x{12543} + \x{12543} 0: \x{12543} # Character not in script /^\p{Cuneiform}/utf - \x{12544} + \x{12544} No match # Base script check /^\p{sc=Phoenician}/utf - \x{10900} + \x{10900} 0: \x{10900} /^\p{Script=Phnx}/utf - \x{1091f} + \x{1091f} 0: \x{1091f} # Character not in script /^\p{Phoenician}/utf - \x{10920} + \x{10920} No match # Base script check /^\p{sc=Sundanese}/utf - \x{1b80} + \x{1b80} 0: \x{1b80} /^\p{Script=Sund}/utf - \x{1cc7} + \x{1cc7} 0: \x{1cc7} # Character not in script /^\p{Sundanese}/utf - \x{1cc8} + \x{1cc8} No match # Base script check /^\p{sc=Lepcha}/utf - \x{1c00} + \x{1c00} 0: \x{1c00} /^\p{Script=Lepc}/utf - \x{1c4f} + \x{1c4f} 0: \x{1c4f} # Character not in script /^\p{Lepcha}/utf - \x{1c50} + \x{1c50} No match # Base script check /^\p{sc=Ol_Chiki}/utf - \x{1c50} + \x{1c50} 0: \x{1c50} /^\p{Script=Olck}/utf - \x{1c7f} + \x{1c7f} 0: \x{1c7f} # Character not in script /^\p{Ol_Chiki}/utf - \x{1c80} + \x{1c80} No match # Base script check /^\p{sc=Vai}/utf - \x{a500} + \x{a500} 0: \x{a500} /^\p{Script=Vaii}/utf - \x{a62b} + \x{a62b} 0: \x{a62b} # Character not in script /^\p{Vai}/utf - \x{a62c} + \x{a62c} No match # Base script check /^\p{sc=Saurashtra}/utf - \x{a880} + \x{a880} 0: \x{a880} /^\p{Script=Saur}/utf - \x{a8d9} + \x{a8d9} 0: \x{a8d9} # Character not in script /^\p{Saurashtra}/utf - \x{a8da} + \x{a8da} No match # Base script check /^\p{sc=Rejang}/utf - \x{a930} + \x{a930} 0: \x{a930} /^\p{Script=Rjng}/utf - \x{a95f} + \x{a95f} 0: \x{a95f} # Character not in script /^\p{Rejang}/utf - \x{a960} + \x{a960} No match # Base script check /^\p{sc=Lycian}/utf - \x{10280} + \x{10280} 0: \x{10280} /^\p{Script=Lyci}/utf - \x{1029c} + \x{1029c} 0: \x{1029c} # Character not in script /^\p{Lycian}/utf - \x{1029d} + \x{1029d} No match # Base script check /^\p{sc=Carian}/utf - \x{102a0} + \x{102a0} 0: \x{102a0} /^\p{Script=Cari}/utf - \x{102d0} + \x{102d0} 0: \x{102d0} # Character not in script /^\p{Carian}/utf - \x{102d1} + \x{102d1} No match # Base script check /^\p{sc=Lydian}/utf - \x{10920} + \x{10920} 0: \x{10920} /^\p{Script=Lydi}/utf - \x{1093f} + \x{1093f} 0: \x{1093f} # Character not in script /^\p{Lydian}/utf - \x{10940} + \x{10940} No match # Base script check /^\p{sc=Cham}/utf - \x{aa00} + \x{aa00} 0: \x{aa00} /^\p{Script=Cham}/utf - \x{aa5f} + \x{aa5f} 0: \x{aa5f} # Character not in script /^\p{Cham}/utf - \x{aa60} + \x{aa60} No match # Base script check /^\p{sc=Tai_Tham}/utf - \x{1a20} + \x{1a20} 0: \x{1a20} /^\p{Script=Lana}/utf - \x{1aad} + \x{1aad} 0: \x{1aad} # Character not in script /^\p{Tai_Tham}/utf - \x{1aae} + \x{1aae} No match # Base script check /^\p{sc=Tai_Viet}/utf - \x{aa80} + \x{aa80} 0: \x{aa80} /^\p{Script=Tavt}/utf - \x{aadf} + \x{aadf} 0: \x{aadf} # Character not in script /^\p{Tai_Viet}/utf - \x{aae0} + \x{aae0} No match # Base script check /^\p{sc=Avestan}/utf - \x{10b00} + \x{10b00} 0: \x{10b00} /^\p{Script=Avst}/utf - \x{10b3f} + \x{10b3f} 0: \x{10b3f} # Character not in script /^\p{Avestan}/utf - \x{10b40} + \x{10b40} No match # Base script check /^\p{sc=Egyptian_Hieroglyphs}/utf - \x{13000} + \x{13000} 0: \x{13000} /^\p{Script=Egyp}/utf - \x{13455} + \x{13455} 0: \x{13455} # Character not in script /^\p{Egyptian_Hieroglyphs}/utf - \x{13456} + \x{13456} No match # Base script check /^\p{sc=Samaritan}/utf - \x{800} + \x{800} 0: \x{800} /^\p{Script=Samr}/utf - \x{83e} + \x{83e} 0: \x{83e} # Character not in script /^\p{Samaritan}/utf - \x{83f} + \x{83f} No match # Base script check /^\p{sc=Lisu}/utf - \x{a4d0} + \x{a4d0} 0: \x{a4d0} /^\p{Script=Lisu}/utf - \x{11fb0} + \x{11fb0} 0: \x{11fb0} # Character not in script /^\p{Lisu}/utf - \x{11fb1} + \x{11fb1} No match # Base script check /^\p{sc=Bamum}/utf - \x{a6a0} + \x{a6a0} 0: \x{a6a0} /^\p{Script=Bamu}/utf - \x{16a38} + \x{16a38} 0: \x{16a38} # Character not in script /^\p{Bamum}/utf - \x{16a39} + \x{16a39} No match # Base script check /^\p{sc=Meetei_Mayek}/utf - \x{aae0} + \x{aae0} 0: \x{aae0} /^\p{Script=Mtei}/utf - \x{abf9} + \x{abf9} 0: \x{abf9} # Character not in script /^\p{Meetei_Mayek}/utf - \x{abfa} + \x{abfa} No match # Base script check /^\p{sc=Imperial_Aramaic}/utf - \x{10840} + \x{10840} 0: \x{10840} /^\p{Script=Armi}/utf - \x{1085f} + \x{1085f} 0: \x{1085f} # Character not in script /^\p{Imperial_Aramaic}/utf - \x{10860} + \x{10860} No match # Base script check /^\p{sc=Old_South_Arabian}/utf - \x{10a60} + \x{10a60} 0: \x{10a60} /^\p{Script=Sarb}/utf - \x{10a7f} + \x{10a7f} 0: \x{10a7f} # Character not in script /^\p{Old_South_Arabian}/utf - \x{10a80} + \x{10a80} No match # Base script check /^\p{sc=Inscriptional_Parthian}/utf - \x{10b40} + \x{10b40} 0: \x{10b40} /^\p{Script=Prti}/utf - \x{10b5f} + \x{10b5f} 0: \x{10b5f} # Character not in script /^\p{Inscriptional_Parthian}/utf - \x{10b60} + \x{10b60} No match # Base script check /^\p{sc=Inscriptional_Pahlavi}/utf - \x{10b60} + \x{10b60} 0: \x{10b60} /^\p{Script=Phli}/utf - \x{10b7f} + \x{10b7f} 0: \x{10b7f} # Character not in script /^\p{Inscriptional_Pahlavi}/utf - \x{10b80} + \x{10b80} No match # Base script check /^\p{sc=Old_Turkic}/utf - \x{10c00} + \x{10c00} 0: \x{10c00} /^\p{Script=Orkh}/utf - \x{10c48} + \x{10c48} 0: \x{10c48} # Character not in script /^\p{Old_Turkic}/utf - \x{10c49} + \x{10c49} No match # Base script check /^\p{sc=Batak}/utf - \x{1bc0} + \x{1bc0} 0: \x{1bc0} /^\p{Script=Batk}/utf - \x{1bff} + \x{1bff} 0: \x{1bff} # Character not in script /^\p{Batak}/utf - \x{1c00} + \x{1c00} No match # Base script check /^\p{sc=Brahmi}/utf - \x{11000} + \x{11000} 0: \x{11000} /^\p{Script=Brah}/utf - \x{1107f} + \x{1107f} 0: \x{1107f} # Character not in script /^\p{Brahmi}/utf - \x{11080} + \x{11080} No match # Base script check /^\p{sc=Meroitic_Cursive}/utf - \x{109a0} + \x{109a0} 0: \x{109a0} /^\p{Script=Merc}/utf - \x{109ff} + \x{109ff} 0: \x{109ff} # Character not in script /^\p{Meroitic_Cursive}/utf - \x{10a00} + \x{10a00} No match # Base script check /^\p{sc=Meroitic_Hieroglyphs}/utf - \x{10980} + \x{10980} 0: \x{10980} /^\p{Script=Mero}/utf - \x{1099f} + \x{1099f} 0: \x{1099f} # Character not in script /^\p{Meroitic_Hieroglyphs}/utf - \x{109a0} + \x{109a0} No match # Base script check /^\p{sc=Miao}/utf - \x{16f00} + \x{16f00} 0: \x{16f00} /^\p{Script=Plrd}/utf - \x{16f9f} + \x{16f9f} 0: \x{16f9f} # Character not in script /^\p{Miao}/utf - \x{16fa0} + \x{16fa0} No match # Base script check /^\p{sc=Sora_Sompeng}/utf - \x{110d0} + \x{110d0} 0: \x{110d0} /^\p{Script=Sora}/utf - \x{110f9} + \x{110f9} 0: \x{110f9} # Character not in script /^\p{Sora_Sompeng}/utf - \x{110fa} + \x{110fa} No match # Base script check /^\p{sc=Caucasian_Albanian}/utf - \x{10530} + \x{10530} 0: \x{10530} /^\p{Script=Aghb}/utf - \x{1056f} + \x{1056f} 0: \x{1056f} # Character not in script /^\p{Caucasian_Albanian}/utf - \x{10570} + \x{10570} No match # Base script check /^\p{sc=Bassa_Vah}/utf - \x{16ad0} + \x{16ad0} 0: \x{16ad0} /^\p{Script=Bass}/utf - \x{16af5} + \x{16af5} 0: \x{16af5} # Character not in script /^\p{Bassa_Vah}/utf - \x{16af6} + \x{16af6} No match # Base script check /^\p{sc=Elbasan}/utf - \x{10500} + \x{10500} 0: \x{10500} /^\p{Script=Elba}/utf - \x{10527} + \x{10527} 0: \x{10527} # Character not in script /^\p{Elbasan}/utf - \x{10528} + \x{10528} No match # Base script check /^\p{sc=Pahawh_Hmong}/utf - \x{16b00} + \x{16b00} 0: \x{16b00} /^\p{Script=Hmng}/utf - \x{16b8f} + \x{16b8f} 0: \x{16b8f} # Character not in script /^\p{Pahawh_Hmong}/utf - \x{16b90} + \x{16b90} No match # Base script check /^\p{sc=Mende_Kikakui}/utf - \x{1e800} + \x{1e800} 0: \x{1e800} /^\p{Script=Mend}/utf - \x{1e8d6} + \x{1e8d6} 0: \x{1e8d6} # Character not in script /^\p{Mende_Kikakui}/utf - \x{1e8d7} + \x{1e8d7} No match # Base script check /^\p{sc=Mro}/utf - \x{16a40} + \x{16a40} 0: \x{16a40} /^\p{Script=Mroo}/utf - \x{16a6f} + \x{16a6f} 0: \x{16a6f} # Character not in script /^\p{Mro}/utf - \x{16a70} + \x{16a70} No match # Base script check /^\p{sc=Old_North_Arabian}/utf - \x{10a80} + \x{10a80} 0: \x{10a80} /^\p{Script=Narb}/utf - \x{10a9f} + \x{10a9f} 0: \x{10a9f} # Character not in script /^\p{Old_North_Arabian}/utf - \x{10aa0} + \x{10aa0} No match # Base script check /^\p{sc=Nabataean}/utf - \x{10880} + \x{10880} 0: \x{10880} /^\p{Script=Nbat}/utf - \x{108af} + \x{108af} 0: \x{108af} # Character not in script /^\p{Nabataean}/utf - \x{108b0} + \x{108b0} No match # Base script check /^\p{sc=Palmyrene}/utf - \x{10860} + \x{10860} 0: \x{10860} /^\p{Script=Palm}/utf - \x{1087f} + \x{1087f} 0: \x{1087f} # Character not in script /^\p{Palmyrene}/utf - \x{10880} + \x{10880} No match # Base script check /^\p{sc=Pau_Cin_Hau}/utf - \x{11ac0} + \x{11ac0} 0: \x{11ac0} /^\p{Script=Pauc}/utf - \x{11af8} + \x{11af8} 0: \x{11af8} # Character not in script /^\p{Pau_Cin_Hau}/utf - \x{11af9} + \x{11af9} No match # Base script check /^\p{sc=Siddham}/utf - \x{11580} + \x{11580} 0: \x{11580} /^\p{Script=Sidd}/utf - \x{115dd} + \x{115dd} 0: \x{115dd} # Character not in script /^\p{Siddham}/utf - \x{115de} + \x{115de} No match # Base script check /^\p{sc=Warang_Citi}/utf - \x{118a0} + \x{118a0} 0: \x{118a0} /^\p{Script=Wara}/utf - \x{118ff} + \x{118ff} 0: \x{118ff} # Character not in script /^\p{Warang_Citi}/utf - \x{11900} + \x{11900} No match # Base script check /^\p{sc=Ahom}/utf - \x{11700} + \x{11700} 0: \x{11700} /^\p{Script=Ahom}/utf - \x{11746} + \x{11746} 0: \x{11746} # Character not in script /^\p{Ahom}/utf - \x{11747} + \x{11747} No match # Base script check /^\p{sc=Anatolian_Hieroglyphs}/utf - \x{14400} + \x{14400} 0: \x{14400} /^\p{Script=Hluw}/utf - \x{14646} + \x{14646} 0: \x{14646} # Character not in script /^\p{Anatolian_Hieroglyphs}/utf - \x{14647} + \x{14647} No match # Base script check /^\p{sc=Hatran}/utf - \x{108e0} + \x{108e0} 0: \x{108e0} /^\p{Script=Hatr}/utf - \x{108ff} + \x{108ff} 0: \x{108ff} # Character not in script /^\p{Hatran}/utf - \x{10900} + \x{10900} No match # Base script check /^\p{sc=Old_Hungarian}/utf - \x{10c80} + \x{10c80} 0: \x{10c80} /^\p{Script=Hung}/utf - \x{10cff} + \x{10cff} 0: \x{10cff} # Character not in script /^\p{Old_Hungarian}/utf - \x{10d00} + \x{10d00} No match # Base script check /^\p{sc=SignWriting}/utf - \x{1d800} + \x{1d800} 0: \x{1d800} /^\p{Script=Sgnw}/utf - \x{1daaf} + \x{1daaf} 0: \x{1daaf} # Character not in script /^\p{SignWriting}/utf - \x{1dab0} + \x{1dab0} No match # Base script check /^\p{sc=Bhaiksuki}/utf - \x{11c00} + \x{11c00} 0: \x{11c00} /^\p{Script=Bhks}/utf - \x{11c6c} + \x{11c6c} 0: \x{11c6c} # Character not in script /^\p{Bhaiksuki}/utf - \x{11c6d} + \x{11c6d} No match # Base script check /^\p{sc=Marchen}/utf - \x{11c70} + \x{11c70} 0: \x{11c70} /^\p{Script=Marc}/utf - \x{11cb6} + \x{11cb6} 0: \x{11cb6} # Character not in script /^\p{Marchen}/utf - \x{11cb7} + \x{11cb7} No match # Base script check /^\p{sc=Newa}/utf - \x{11400} + \x{11400} 0: \x{11400} /^\p{Script=Newa}/utf - \x{11461} + \x{11461} 0: \x{11461} # Character not in script /^\p{Newa}/utf - \x{11462} + \x{11462} No match # Base script check /^\p{sc=Osage}/utf - \x{104b0} + \x{104b0} 0: \x{104b0} /^\p{Script=Osge}/utf - \x{104fb} + \x{104fb} 0: \x{104fb} # Character not in script /^\p{Osage}/utf - \x{104fc} + \x{104fc} No match # Base script check /^\p{sc=Tangut}/utf - \x{16fe0} + \x{16fe0} 0: \x{16fe0} /^\p{Script=Tang}/utf - \x{18d08} + \x{18d08} 0: \x{18d08} # Character not in script /^\p{Tangut}/utf - \x{18d09} + \x{18d09} No match # Base script check /^\p{sc=Nushu}/utf - \x{16fe1} + \x{16fe1} 0: \x{16fe1} /^\p{Script=Nshu}/utf - \x{1b2fb} + \x{1b2fb} 0: \x{1b2fb} # Character not in script /^\p{Nushu}/utf - \x{1b2fc} + \x{1b2fc} No match # Base script check /^\p{sc=Soyombo}/utf - \x{11a50} + \x{11a50} 0: \x{11a50} /^\p{Script=Soyo}/utf - \x{11aa2} + \x{11aa2} 0: \x{11aa2} # Character not in script /^\p{Soyombo}/utf - \x{11aa3} + \x{11aa3} No match # Base script check /^\p{sc=Zanabazar_Square}/utf - \x{11a00} + \x{11a00} 0: \x{11a00} /^\p{Script=Zanb}/utf - \x{11a47} + \x{11a47} 0: \x{11a47} # Character not in script /^\p{Zanabazar_Square}/utf - \x{11a48} + \x{11a48} No match # Base script check /^\p{sc=Makasar}/utf - \x{11ee0} + \x{11ee0} 0: \x{11ee0} /^\p{Script=Maka}/utf - \x{11ef8} + \x{11ef8} 0: \x{11ef8} # Character not in script /^\p{Makasar}/utf - \x{11ef9} + \x{11ef9} No match # Base script check /^\p{sc=Medefaidrin}/utf - \x{16e40} + \x{16e40} 0: \x{16e40} /^\p{Script=Medf}/utf - \x{16e9a} + \x{16e9a} 0: \x{16e9a} # Character not in script /^\p{Medefaidrin}/utf - \x{16e9b} + \x{16e9b} No match # Base script check /^\p{sc=Old_Sogdian}/utf - \x{10f00} + \x{10f00} 0: \x{10f00} /^\p{Script=Sogo}/utf - \x{10f27} + \x{10f27} 0: \x{10f27} # Character not in script /^\p{Old_Sogdian}/utf - \x{10f28} + \x{10f28} No match # Base script check /^\p{sc=Elymaic}/utf - \x{10fe0} + \x{10fe0} 0: \x{10fe0} /^\p{Script=Elym}/utf - \x{10ff6} + \x{10ff6} 0: \x{10ff6} # Character not in script /^\p{Elymaic}/utf - \x{10ff7} + \x{10ff7} No match # Base script check /^\p{sc=Nyiakeng_Puachue_Hmong}/utf - \x{1e100} + \x{1e100} 0: \x{1e100} /^\p{Script=Hmnp}/utf - \x{1e14f} + \x{1e14f} 0: \x{1e14f} # Character not in script /^\p{Nyiakeng_Puachue_Hmong}/utf - \x{1e150} + \x{1e150} No match # Base script check /^\p{sc=Wancho}/utf - \x{1e2c0} + \x{1e2c0} 0: \x{1e2c0} /^\p{Script=Wcho}/utf - \x{1e2ff} + \x{1e2ff} 0: \x{1e2ff} # Character not in script /^\p{Wancho}/utf - \x{1e300} + \x{1e300} No match # Base script check /^\p{sc=Chorasmian}/utf - \x{10fb0} + \x{10fb0} 0: \x{10fb0} /^\p{Script=Chrs}/utf - \x{10fcb} + \x{10fcb} 0: \x{10fcb} # Character not in script /^\p{Chorasmian}/utf - \x{10fcc} + \x{10fcc} No match # Base script check /^\p{sc=Dives_Akuru}/utf - \x{11900} + \x{11900} 0: \x{11900} /^\p{Script=Diak}/utf - \x{11959} + \x{11959} 0: \x{11959} # Character not in script /^\p{Dives_Akuru}/utf - \x{1195a} + \x{1195a} No match # Base script check /^\p{sc=Khitan_Small_Script}/utf - \x{16fe4} + \x{16fe4} 0: \x{16fe4} /^\p{Script=Kits}/utf - \x{18cd5} + \x{18cd5} 0: \x{18cd5} # Character not in script /^\p{Khitan_Small_Script}/utf - \x{18cd6} + \x{18cd6} No match # Base script check /^\p{sc=Tangsa}/utf - \x{16a70} + \x{16a70} 0: \x{16a70} /^\p{Script=Tnsa}/utf - \x{16ac9} + \x{16ac9} 0: \x{16ac9} # Character not in script /^\p{Tangsa}/utf - \x{16aca} + \x{16aca} No match # Base script check /^\p{sc=Toto}/utf - \x{1e290} + \x{1e290} 0: \x{1e290} /^\p{Script=Toto}/utf - \x{1e2ae} + \x{1e2ae} 0: \x{1e2ae} # Character not in script /^\p{Toto}/utf - \x{1e2af} + \x{1e2af} No match # Base script check /^\p{sc=Vithkuqi}/utf - \x{10570} + \x{10570} 0: \x{10570} /^\p{Script=Vith}/utf - \x{105bc} + \x{105bc} 0: \x{105bc} # Character not in script /^\p{Vithkuqi}/utf - \x{105bd} + \x{105bd} No match # Base script check /^\p{sc=Kawi}/utf - \x{11f00} + \x{11f00} 0: \x{11f00} /^\p{Script=Kawi}/utf - \x{11f59} + \x{11f59} 0: \x{11f59} # Character not in script /^\p{Kawi}/utf - \x{11f5a} + \x{11f6a} No match # Base script check /^\p{sc=Nag_Mundari}/utf - \x{1e4d0} + \x{1e4d0} 0: \x{1e4d0} /^\p{Script=Nagm}/utf - \x{1e4f9} + \x{1e4f9} 0: \x{1e4f9} # Character not in script /^\p{Nag_Mundari}/utf - \x{1e4fa} + \x{1e4fa} No match # End of testinput26 diff --git a/testdata/testoutput27 b/testdata/testoutput27 new file mode 100644 index 0000000..9cf33bf --- /dev/null +++ b/testdata/testoutput27 @@ -0,0 +1,4153 @@ +# These tests were generated by maint/GenerateTest.py using PCRE2's UCP +# data, do not edit unless that data has changed and they are reflecting +# a previous version. + +# Unicode Script Extension tests for version 16.0.0 + +#perltest + +# Base script check +/^\p{sc=Latin}/utf + A + 0: A + +/^\p{Script=Latn}/utf + \x{1df2a} + 0: \x{1df2a} + +# Script extension check +/^\p{Latin}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Latn}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Latin}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Latin}/utf + \x{b7} +No match + +# Character not in script +/^\p{Latin}/utf + \x{1df2b} +No match + +# Base script check +/^\p{sc=Greek}/utf + \x{370} + 0: \x{370} + +/^\p{Script=Grek}/utf + \x{1d245} + 0: \x{1d245} + +# Script extension check +/^\p{Greek}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Grek}/utf + \x{205d} + 0: \x{205d} + +# Script extension only character +/^\p{Greek}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Greek}/utf + \x{b7} +No match + +# Character not in script +/^\p{Greek}/utf + \x{1d246} +No match + +# Base script check +/^\p{sc=Cyrillic}/utf + \x{400} + 0: \x{400} + +/^\p{Script=Cyrl}/utf + \x{1e08f} + 0: \x{1e08f} + +# Script extension check +/^\p{Cyrillic}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{scx=Cyrl}/utf + \x{a66f} + 0: \x{a66f} + +# Script extension only character +/^\p{Cyrillic}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{sc=Cyrillic}/utf + \x{2bc} +No match + +# Character not in script +/^\p{Cyrillic}/utf + \x{1e090} +No match + +# Base script check +/^\p{sc=Armenian}/utf + \x{531} + 0: \x{531} + +/^\p{Script=Armn}/utf + \x{fb17} + 0: \x{fb17} + +# Script extension check +/^\p{Armenian}/utf + \x{308} + 0: \x{308} + +/^\p{Script_Extensions=Armn}/utf + \x{589} + 0: \x{589} + +# Script extension only character +/^\p{Armenian}/utf + \x{308} + 0: \x{308} + +/^\p{sc=Armenian}/utf + \x{308} +No match + +# Character not in script +/^\p{Armenian}/utf + \x{fb18} +No match + +# Base script check +/^\p{sc=Hebrew}/utf + \x{591} + 0: \x{591} + +/^\p{Script=Hebr}/utf + \x{fb4f} + 0: \x{fb4f} + +# Script extension check +/^\p{Hebrew}/utf + \x{307} + 0: \x{307} + +/^\p{scx=Hebr}/utf + \x{308} + 0: \x{308} + +# Script extension only character +/^\p{Hebrew}/utf + \x{307} + 0: \x{307} + +/^\p{sc=Hebrew}/utf + \x{307} +No match + +# Character not in script +/^\p{Hebrew}/utf + \x{fb50} +No match + +# Base script check +/^\p{sc=Arabic}/utf + \x{600} + 0: \x{600} + +/^\p{Script=Arab}/utf + \x{1eef1} + 0: \x{1eef1} + +# Script extension check +/^\p{Arabic}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Arab}/utf + \x{102fb} + 0: \x{102fb} + +# Script extension only character +/^\p{Arabic}/utf + \x{60c} + 0: \x{60c} + +/^\p{sc=Arabic}/utf + \x{60c} +No match + +# Character not in script +/^\p{Arabic}/utf + \x{1eef2} +No match + +# Base script check +/^\p{sc=Syriac}/utf + \x{700} + 0: \x{700} + +/^\p{Script=Syrc}/utf + \x{86a} + 0: \x{86a} + +# Script extension check +/^\p{Syriac}/utf + \x{303} + 0: \x{303} + +/^\p{scx=Syrc}/utf + \x{1dfa} + 0: \x{1dfa} + +# Script extension only character +/^\p{Syriac}/utf + \x{303} + 0: \x{303} + +/^\p{sc=Syriac}/utf + \x{303} +No match + +# Character not in script +/^\p{Syriac}/utf + \x{1dfb} +No match + +# Base script check +/^\p{sc=Thaana}/utf + \x{780} + 0: \x{780} + +/^\p{Script=Thaa}/utf + \x{7b1} + 0: \x{7b1} + +# Script extension check +/^\p{Thaana}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Thaa}/utf + \x{fdfd} + 0: \x{fdfd} + +# Script extension only character +/^\p{Thaana}/utf + \x{60c} + 0: \x{60c} + +/^\p{sc=Thaana}/utf + \x{60c} +No match + +# Character not in script +/^\p{Thaana}/utf + \x{fdfe} +No match + +# Base script check +/^\p{sc=Devanagari}/utf + \x{900} + 0: \x{900} + +/^\p{Script=Deva}/utf + \x{11b09} + 0: \x{11b09} + +# Script extension check +/^\p{Devanagari}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{scx=Deva}/utf + \x{a8f3} + 0: \x{a8f3} + +# Script extension only character +/^\p{Devanagari}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{sc=Devanagari}/utf + \x{2bc} +No match + +# Character not in script +/^\p{Devanagari}/utf + \x{11b0a} +No match + +# Base script check +/^\p{sc=Bengali}/utf + \x{980} + 0: \x{980} + +/^\p{Script=Beng}/utf + \x{9fe} + 0: \x{9fe} + +# Script extension check +/^\p{Bengali}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{Script_Extensions=Beng}/utf + \x{a8f1} + 0: \x{a8f1} + +# Script extension only character +/^\p{Bengali}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{sc=Bengali}/utf + \x{2bc} +No match + +# Character not in script +/^\p{Bengali}/utf + \x{a8f2} +No match + +# Base script check +/^\p{sc=Gurmukhi}/utf + \x{a01} + 0: \x{a01} + +/^\p{Script=Guru}/utf + \x{a76} + 0: \x{a76} + +# Script extension check +/^\p{Gurmukhi}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Guru}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Gurmukhi}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Gurmukhi}/utf + \x{951} +No match + +# Character not in script +/^\p{Gurmukhi}/utf + \x{a83a} +No match + +# Base script check +/^\p{sc=Gujarati}/utf + \x{a81} + 0: \x{a81} + +/^\p{Script=Gujr}/utf + \x{aff} + 0: \x{aff} + +# Script extension check +/^\p{Gujarati}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Gujr}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Gujarati}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Gujarati}/utf + \x{951} +No match + +# Character not in script +/^\p{Gujarati}/utf + \x{a83a} +No match + +# Base script check +/^\p{sc=Oriya}/utf + \x{b01} + 0: \x{b01} + +/^\p{Script=Orya}/utf + \x{b77} + 0: \x{b77} + +# Script extension check +/^\p{Oriya}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Orya}/utf + \x{1cf2} + 0: \x{1cf2} + +# Script extension only character +/^\p{Oriya}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Oriya}/utf + \x{951} +No match + +# Character not in script +/^\p{Oriya}/utf + \x{1cf3} +No match + +# Base script check +/^\p{sc=Tamil}/utf + \x{b82} + 0: \x{b82} + +/^\p{Script=Taml}/utf + \x{11fff} + 0: \x{11fff} + +# Script extension check +/^\p{Tamil}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Taml}/utf + \x{11fd3} + 0: \x{11fd3} + +# Script extension only character +/^\p{Tamil}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Tamil}/utf + \x{951} +No match + +# Character not in script +/^\p{Tamil}/utf + \x{12000} +No match + +# Base script check +/^\p{sc=Telugu}/utf + \x{c00} + 0: \x{c00} + +/^\p{Script=Telu}/utf + \x{c7f} + 0: \x{c7f} + +# Script extension check +/^\p{Telugu}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Telu}/utf + \x{1cf2} + 0: \x{1cf2} + +# Script extension only character +/^\p{Telugu}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Telugu}/utf + \x{951} +No match + +# Character not in script +/^\p{Telugu}/utf + \x{1cf3} +No match + +# Base script check +/^\p{sc=Kannada}/utf + \x{c80} + 0: \x{c80} + +/^\p{Script=Knda}/utf + \x{cf3} + 0: \x{cf3} + +# Script extension check +/^\p{Kannada}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Knda}/utf + \x{a835} + 0: \x{a835} + +# Script extension only character +/^\p{Kannada}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Kannada}/utf + \x{951} +No match + +# Character not in script +/^\p{Kannada}/utf + \x{a836} +No match + +# Base script check +/^\p{sc=Malayalam}/utf + \x{d00} + 0: \x{d00} + +/^\p{Script=Mlym}/utf + \x{d7f} + 0: \x{d7f} + +# Script extension check +/^\p{Malayalam}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Mlym}/utf + \x{a832} + 0: \x{a832} + +# Script extension only character +/^\p{Malayalam}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Malayalam}/utf + \x{951} +No match + +# Character not in script +/^\p{Malayalam}/utf + \x{a833} +No match + +# Base script check +/^\p{sc=Sinhala}/utf + \x{d81} + 0: \x{d81} + +/^\p{Script=Sinh}/utf + \x{111f4} + 0: \x{111f4} + +# Script extension check +/^\p{Sinhala}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Sinh}/utf + \x{1cf2} + 0: \x{1cf2} + +# Script extension only character +/^\p{Sinhala}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Sinhala}/utf + \x{964} +No match + +# Character not in script +/^\p{Sinhala}/utf + \x{111f5} +No match + +# Base script check +/^\p{sc=Thai}/utf + \x{e01} + 0: \x{e01} + +/^\p{Script=Thai}/utf + \x{e5b} + 0: \x{e5b} + +# Script extension check +/^\p{Thai}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{scx=Thai}/utf + \x{331} + 0: \x{331} + +# Script extension only character +/^\p{Thai}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{sc=Thai}/utf + \x{2bc} +No match + +# Character not in script +/^\p{Thai}/utf + \x{e5c} +No match + +# Base script check +/^\p{sc=Tibetan}/utf + \x{f00} + 0: \x{f00} + +/^\p{Script=Tibt}/utf + \x{fda} + 0: \x{fda} + +# Script extension check +/^\p{Tibetan}/utf + \x{3008} + 0: \x{3008} + +/^\p{Script_Extensions=Tibt}/utf + \x{300b} + 0: \x{300b} + +# Script extension only character +/^\p{Tibetan}/utf + \x{3008} + 0: \x{3008} + +/^\p{sc=Tibetan}/utf + \x{3008} +No match + +# Character not in script +/^\p{Tibetan}/utf + \x{300c} +No match + +# Base script check +/^\p{sc=Myanmar}/utf + \x{1000} + 0: \x{1000} + +/^\p{Script=Mymr}/utf + \x{116e3} + 0: \x{116e3} + +# Script extension check +/^\p{Myanmar}/utf + \x{1040} + 0: \x{1040} + +/^\p{scx=Mymr}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Myanmar}/utf + \x{a92e} + 0: \x{a92e} + +/^\p{sc=Myanmar}/utf + \x{a92e} +No match + +# Character not in script +/^\p{Myanmar}/utf + \x{116e4} +No match + +# Base script check +/^\p{sc=Georgian}/utf + \x{10a0} + 0: \x{10a0} + +/^\p{Script=Geor}/utf + \x{2d2d} + 0: \x{2d2d} + +# Script extension check +/^\p{Georgian}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Geor}/utf + \x{2e31} + 0: \x{2e31} + +# Script extension only character +/^\p{Georgian}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Georgian}/utf + \x{b7} +No match + +# Character not in script +/^\p{Georgian}/utf + \x{2e32} +No match + +# Base script check +/^\p{sc=Hangul}/utf + \x{1100} + 0: \x{1100} + +/^\p{Script=Hang}/utf + \x{ffdc} + 0: \x{ffdc} + +# Script extension check +/^\p{Hangul}/utf + \x{3001} + 0: \x{3001} + +/^\p{scx=Hang}/utf + \x{ff65} + 0: \x{ff65} + +# Script extension only character +/^\p{Hangul}/utf + \x{3001} + 0: \x{3001} + +/^\p{sc=Hangul}/utf + \x{3001} +No match + +# Character not in script +/^\p{Hangul}/utf + \x{ffdd} +No match + +# Base script check +/^\p{sc=Ethiopic}/utf + \x{1200} + 0: \x{1200} + +/^\p{Script=Ethi}/utf + \x{1e7fe} + 0: \x{1e7fe} + +# Script extension check +/^\p{Ethiopic}/utf + \x{30e} + 0: \x{30e} + +/^\p{Script_Extensions=Ethi}/utf + \x{30e} + 0: \x{30e} + +# Script extension only character +/^\p{Ethiopic}/utf + \x{30e} + 0: \x{30e} + +/^\p{sc=Ethiopic}/utf + \x{30e} +No match + +# Character not in script +/^\p{Ethiopic}/utf + \x{1e7ff} +No match + +# Base script check +/^\p{sc=Cherokee}/utf + \x{13a0} + 0: \x{13a0} + +/^\p{Script=Cher}/utf + \x{abbf} + 0: \x{abbf} + +# Script extension check +/^\p{Cherokee}/utf + \x{300} + 0: \x{300} + +/^\p{scx=Cher}/utf + \x{331} + 0: \x{331} + +# Script extension only character +/^\p{Cherokee}/utf + \x{300} + 0: \x{300} + +/^\p{sc=Cherokee}/utf + \x{300} +No match + +# Character not in script +/^\p{Cherokee}/utf + \x{abc0} +No match + +# Base script check +/^\p{sc=Runic}/utf + \x{16a0} + 0: \x{16a0} + +/^\p{Script=Runr}/utf + \x{16f8} + 0: \x{16f8} + +# Script extension check +/^\p{Runic}/utf + \x{16eb} + 0: \x{16eb} + +/^\p{Script_Extensions=Runr}/utf + \x{16ed} + 0: \x{16ed} + +# Script extension only character +/^\p{Runic}/utf + \x{16eb} + 0: \x{16eb} + +/^\p{sc=Runic}/utf + \x{16eb} +No match + +# Character not in script +/^\p{Runic}/utf + \x{16f9} +No match + +# Base script check +/^\p{sc=Mongolian}/utf + \x{1800} + 0: \x{1800} + +/^\p{Script=Mong}/utf + \x{1166c} + 0: \x{1166c} + +# Script extension check +/^\p{Mongolian}/utf + \x{1802} + 0: \x{1802} + +/^\p{scx=Mong}/utf + \x{300b} + 0: \x{300b} + +# Script extension only character +/^\p{Mongolian}/utf + \x{1802} + 0: \x{1802} + +/^\p{sc=Mongolian}/utf + \x{1802} +No match + +# Character not in script +/^\p{Mongolian}/utf + \x{1166d} +No match + +# Base script check +/^\p{sc=Hiragana}/utf + \x{3041} + 0: \x{3041} + +/^\p{Script=Hira}/utf + \x{1f200} + 0: \x{1f200} + +# Script extension check +/^\p{Hiragana}/utf + \x{3001} + 0: \x{3001} + +/^\p{Script_Extensions=Hira}/utf + \x{ff9f} + 0: \x{ff9f} + +# Script extension only character +/^\p{Hiragana}/utf + \x{3001} + 0: \x{3001} + +/^\p{sc=Hiragana}/utf + \x{3001} +No match + +# Character not in script +/^\p{Hiragana}/utf + \x{1f201} +No match + +# Base script check +/^\p{sc=Katakana}/utf + \x{30a1} + 0: \x{30a1} + +/^\p{Script=Kana}/utf + \x{1b167} + 0: \x{1b167} + +# Script extension check +/^\p{Katakana}/utf + \x{305} + 0: \x{305} + +/^\p{scx=Kana}/utf + \x{ff9f} + 0: \x{ff9f} + +# Script extension only character +/^\p{Katakana}/utf + \x{305} + 0: \x{305} + +/^\p{sc=Katakana}/utf + \x{305} +No match + +# Character not in script +/^\p{Katakana}/utf + \x{1b168} +No match + +# Base script check +/^\p{sc=Bopomofo}/utf + \x{2ea} + 0: \x{2ea} + +/^\p{Script=Bopo}/utf + \x{31bf} + 0: \x{31bf} + +# Script extension check +/^\p{Bopomofo}/utf + \x{2c7} + 0: \x{2c7} + +/^\p{Script_Extensions=Bopo}/utf + \x{ff65} + 0: \x{ff65} + +# Script extension only character +/^\p{Bopomofo}/utf + \x{2c7} + 0: \x{2c7} + +/^\p{sc=Bopomofo}/utf + \x{2c7} +No match + +# Character not in script +/^\p{Bopomofo}/utf + \x{ff66} +No match + +# Base script check +/^\p{sc=Han}/utf + \x{2e80} + 0: \x{2e80} + +/^\p{Script=Hani}/utf + \x{323af} + 0: \x{323af} + +# Script extension check +/^\p{Han}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Hani}/utf + \x{1f251} + 0: \x{1f251} + +# Script extension only character +/^\p{Han}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Han}/utf + \x{b7} +No match + +# Character not in script +/^\p{Han}/utf + \x{323b0} +No match + +# Base script check +/^\p{sc=Yi}/utf + \x{a000} + 0: \x{a000} + +/^\p{Script=Yiii}/utf + \x{a4c6} + 0: \x{a4c6} + +# Script extension check +/^\p{Yi}/utf + \x{3001} + 0: \x{3001} + +/^\p{Script_Extensions=Yiii}/utf + \x{ff65} + 0: \x{ff65} + +# Script extension only character +/^\p{Yi}/utf + \x{3001} + 0: \x{3001} + +/^\p{sc=Yi}/utf + \x{3001} +No match + +# Character not in script +/^\p{Yi}/utf + \x{ff66} +No match + +# Base script check +/^\p{sc=Gothic}/utf + \x{10330} + 0: \x{10330} + +/^\p{Script=Goth}/utf + \x{1034a} + 0: \x{1034a} + +# Script extension check +/^\p{Gothic}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Goth}/utf + \x{331} + 0: \x{331} + +# Script extension only character +/^\p{Gothic}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Gothic}/utf + \x{b7} +No match + +# Character not in script +/^\p{Gothic}/utf + \x{1034b} +No match + +# Base script check +/^\p{sc=Tagalog}/utf + \x{1700} + 0: \x{1700} + +/^\p{Script=Tglg}/utf + \x{171f} + 0: \x{171f} + +# Script extension check +/^\p{Tagalog}/utf + \x{1735} + 0: \x{1735} + +/^\p{Script_Extensions=Tglg}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Tagalog}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Tagalog}/utf + \x{1735} +No match + +# Character not in script +/^\p{Tagalog}/utf + \x{1737} +No match + +# Base script check +/^\p{sc=Hanunoo}/utf + \x{1720} + 0: \x{1720} + +/^\p{Script=Hano}/utf + \x{1734} + 0: \x{1734} + +# Script extension check +/^\p{Hanunoo}/utf + \x{1735} + 0: \x{1735} + +/^\p{scx=Hano}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Hanunoo}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Hanunoo}/utf + \x{1735} +No match + +# Character not in script +/^\p{Hanunoo}/utf + \x{1737} +No match + +# Base script check +/^\p{sc=Buhid}/utf + \x{1740} + 0: \x{1740} + +/^\p{Script=Buhd}/utf + \x{1753} + 0: \x{1753} + +# Script extension check +/^\p{Buhid}/utf + \x{1735} + 0: \x{1735} + +/^\p{Script_Extensions=Buhd}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Buhid}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Buhid}/utf + \x{1735} +No match + +# Character not in script +/^\p{Buhid}/utf + \x{1754} +No match + +# Base script check +/^\p{sc=Tagbanwa}/utf + \x{1760} + 0: \x{1760} + +/^\p{Script=Tagb}/utf + \x{1773} + 0: \x{1773} + +# Script extension check +/^\p{Tagbanwa}/utf + \x{1735} + 0: \x{1735} + +/^\p{scx=Tagb}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Tagbanwa}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Tagbanwa}/utf + \x{1735} +No match + +# Character not in script +/^\p{Tagbanwa}/utf + \x{1774} +No match + +# Base script check +/^\p{sc=Limbu}/utf + \x{1900} + 0: \x{1900} + +/^\p{Script=Limb}/utf + \x{194f} + 0: \x{194f} + +# Script extension check +/^\p{Limbu}/utf + \x{965} + 0: \x{965} + +/^\p{Script_Extensions=Limb}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Limbu}/utf + \x{965} + 0: \x{965} + +/^\p{sc=Limbu}/utf + \x{965} +No match + +# Character not in script +/^\p{Limbu}/utf + \x{1950} +No match + +# Base script check +/^\p{sc=Tai_Le}/utf + \x{1950} + 0: \x{1950} + +/^\p{Script=Tale}/utf + \x{1974} + 0: \x{1974} + +# Script extension check +/^\p{Tai_Le}/utf + \x{300} + 0: \x{300} + +/^\p{scx=Tale}/utf + \x{1049} + 0: \x{1049} + +# Script extension only character +/^\p{Tai_Le}/utf + \x{300} + 0: \x{300} + +/^\p{sc=Tai_Le}/utf + \x{300} +No match + +# Character not in script +/^\p{Tai_Le}/utf + \x{1975} +No match + +# Base script check +/^\p{sc=Linear_B}/utf + \x{10000} + 0: \x{10000} + +/^\p{Script=Linb}/utf + \x{100fa} + 0: \x{100fa} + +# Script extension check +/^\p{Linear_B}/utf + \x{10100} + 0: \x{10100} + +/^\p{Script_Extensions=Linb}/utf + \x{1013f} + 0: \x{1013f} + +# Script extension only character +/^\p{Linear_B}/utf + \x{10100} + 0: \x{10100} + +/^\p{sc=Linear_B}/utf + \x{10100} +No match + +# Character not in script +/^\p{Linear_B}/utf + \x{10140} +No match + +# Base script check +/^\p{sc=Shavian}/utf + \x{10450} + 0: \x{10450} + +/^\p{Script=Shaw}/utf + \x{1047f} + 0: \x{1047f} + +# Script extension check +/^\p{Shavian}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Shaw}/utf + \x{b7} + 0: \x{b7} + +# Script extension only character +/^\p{Shavian}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Shavian}/utf + \x{b7} +No match + +# Character not in script +/^\p{Shavian}/utf + \x{10480} +No match + +# Base script check +/^\p{sc=Cypriot}/utf + \x{10800} + 0: \x{10800} + +/^\p{Script=Cprt}/utf + \x{1083f} + 0: \x{1083f} + +# Script extension check +/^\p{Cypriot}/utf + \x{10100} + 0: \x{10100} + +/^\p{Script_Extensions=Cprt}/utf + \x{1013f} + 0: \x{1013f} + +# Script extension only character +/^\p{Cypriot}/utf + \x{10100} + 0: \x{10100} + +/^\p{sc=Cypriot}/utf + \x{10100} +No match + +# Character not in script +/^\p{Cypriot}/utf + \x{10840} +No match + +# Base script check +/^\p{sc=Buginese}/utf + \x{1a00} + 0: \x{1a00} + +/^\p{Script=Bugi}/utf + \x{1a1f} + 0: \x{1a1f} + +# Script extension check +/^\p{Buginese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{scx=Bugi}/utf + \x{a9cf} + 0: \x{a9cf} + +# Script extension only character +/^\p{Buginese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{sc=Buginese}/utf + \x{a9cf} +No match + +# Character not in script +/^\p{Buginese}/utf + \x{a9d0} +No match + +# Base script check +/^\p{sc=Coptic}/utf + \x{3e2} + 0: \x{3e2} + +/^\p{Script=Copt}/utf + \x{2cff} + 0: \x{2cff} + +# Script extension check +/^\p{Coptic}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Copt}/utf + \x{102fb} + 0: \x{102fb} + +# Script extension only character +/^\p{Coptic}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Coptic}/utf + \x{b7} +No match + +# Character not in script +/^\p{Coptic}/utf + \x{102fc} +No match + +# Base script check +/^\p{sc=Glagolitic}/utf + \x{2c00} + 0: \x{2c00} + +/^\p{Script=Glag}/utf + \x{1e02a} + 0: \x{1e02a} + +# Script extension check +/^\p{Glagolitic}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Glag}/utf + \x{a66f} + 0: \x{a66f} + +# Script extension only character +/^\p{Glagolitic}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Glagolitic}/utf + \x{b7} +No match + +# Character not in script +/^\p{Glagolitic}/utf + \x{1e02b} +No match + +# Base script check +/^\p{sc=Tifinagh}/utf + \x{2d30} + 0: \x{2d30} + +/^\p{Script=Tfng}/utf + \x{2d7f} + 0: \x{2d7f} + +# Script extension check +/^\p{Tifinagh}/utf + \x{302} + 0: \x{302} + +/^\p{Script_Extensions=Tfng}/utf + \x{309} + 0: \x{309} + +# Script extension only character +/^\p{Tifinagh}/utf + \x{302} + 0: \x{302} + +/^\p{sc=Tifinagh}/utf + \x{302} +No match + +# Character not in script +/^\p{Tifinagh}/utf + \x{2d80} +No match + +# Base script check +/^\p{sc=Syloti_Nagri}/utf + \x{a800} + 0: \x{a800} + +/^\p{Script=Sylo}/utf + \x{a82c} + 0: \x{a82c} + +# Script extension check +/^\p{Syloti_Nagri}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Sylo}/utf + \x{9ef} + 0: \x{9ef} + +# Script extension only character +/^\p{Syloti_Nagri}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Syloti_Nagri}/utf + \x{964} +No match + +# Character not in script +/^\p{Syloti_Nagri}/utf + \x{a82d} +No match + +# Base script check +/^\p{sc=Phags_Pa}/utf + \x{a840} + 0: \x{a840} + +/^\p{Script=Phag}/utf + \x{a877} + 0: \x{a877} + +# Script extension check +/^\p{Phags_Pa}/utf + \x{1802} + 0: \x{1802} + +/^\p{Script_Extensions=Phag}/utf + \x{3002} + 0: \x{3002} + +# Script extension only character +/^\p{Phags_Pa}/utf + \x{1802} + 0: \x{1802} + +/^\p{sc=Phags_Pa}/utf + \x{1802} +No match + +# Character not in script +/^\p{Phags_Pa}/utf + \x{a878} +No match + +# Base script check +/^\p{sc=Nko}/utf + \x{7c0} + 0: \x{7c0} + +/^\p{Script=Nkoo}/utf + \x{7ff} + 0: \x{7ff} + +# Script extension check +/^\p{Nko}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Nkoo}/utf + \x{fd3f} + 0: \x{fd3f} + +# Script extension only character +/^\p{Nko}/utf + \x{60c} + 0: \x{60c} + +/^\p{sc=Nko}/utf + \x{60c} +No match + +# Character not in script +/^\p{Nko}/utf + \x{fd40} +No match + +# Base script check +/^\p{sc=Kayah_Li}/utf + \x{a900} + 0: \x{a900} + +/^\p{Script=Kali}/utf + \x{a92f} + 0: \x{a92f} + +# Script extension check +/^\p{Kayah_Li}/utf + \x{a92e} + 0: \x{a92e} + +/^\p{Script_Extensions=Kali}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Kayah_Li}/utf + \x{a92e} + 0: \x{a92e} + +/^\p{sc=Kayah_Li}/utf + \x{a92e} +No match + +# Character not in script +/^\p{Kayah_Li}/utf + \x{a930} +No match + +# Base script check +/^\p{sc=Lycian}/utf + \x{10280} + 0: \x{10280} + +/^\p{Script=Lyci}/utf + \x{1029c} + 0: \x{1029c} + +# Script extension check +/^\p{Lycian}/utf + \x{205a} + 0: \x{205a} + +/^\p{scx=Lyci}/utf + \x{205a} + 0: \x{205a} + +# Script extension only character +/^\p{Lycian}/utf + \x{205a} + 0: \x{205a} + +/^\p{sc=Lycian}/utf + \x{205a} +No match + +# Character not in script +/^\p{Lycian}/utf + \x{1029d} +No match + +# Base script check +/^\p{sc=Carian}/utf + \x{102a0} + 0: \x{102a0} + +/^\p{Script=Cari}/utf + \x{102d0} + 0: \x{102d0} + +# Script extension check +/^\p{Carian}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Cari}/utf + \x{2e31} + 0: \x{2e31} + +# Script extension only character +/^\p{Carian}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Carian}/utf + \x{b7} +No match + +# Character not in script +/^\p{Carian}/utf + \x{102d1} +No match + +# Base script check +/^\p{sc=Lydian}/utf + \x{10920} + 0: \x{10920} + +/^\p{Script=Lydi}/utf + \x{1093f} + 0: \x{1093f} + +# Script extension check +/^\p{Lydian}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Lydi}/utf + \x{2e31} + 0: \x{2e31} + +# Script extension only character +/^\p{Lydian}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Lydian}/utf + \x{b7} +No match + +# Character not in script +/^\p{Lydian}/utf + \x{10940} +No match + +# Base script check +/^\p{sc=Avestan}/utf + \x{10b00} + 0: \x{10b00} + +/^\p{Script=Avst}/utf + \x{10b3f} + 0: \x{10b3f} + +# Script extension check +/^\p{Avestan}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Avst}/utf + \x{2e31} + 0: \x{2e31} + +# Script extension only character +/^\p{Avestan}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Avestan}/utf + \x{b7} +No match + +# Character not in script +/^\p{Avestan}/utf + \x{10b40} +No match + +# Base script check +/^\p{sc=Samaritan}/utf + \x{800} + 0: \x{800} + +/^\p{Script=Samr}/utf + \x{83e} + 0: \x{83e} + +# Script extension check +/^\p{Samaritan}/utf + \x{2e31} + 0: \x{2e31} + +/^\p{scx=Samr}/utf + \x{2e31} + 0: \x{2e31} + +# Script extension only character +/^\p{Samaritan}/utf + \x{2e31} + 0: \x{2e31} + +/^\p{sc=Samaritan}/utf + \x{2e31} +No match + +# Character not in script +/^\p{Samaritan}/utf + \x{2e32} +No match + +# Base script check +/^\p{sc=Lisu}/utf + \x{a4d0} + 0: \x{a4d0} + +/^\p{Script=Lisu}/utf + \x{11fb0} + 0: \x{11fb0} + +# Script extension check +/^\p{Lisu}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{Script_Extensions=Lisu}/utf + \x{300b} + 0: \x{300b} + +# Script extension only character +/^\p{Lisu}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{sc=Lisu}/utf + \x{2bc} +No match + +# Character not in script +/^\p{Lisu}/utf + \x{11fb1} +No match + +# Base script check +/^\p{sc=Javanese}/utf + \x{a980} + 0: \x{a980} + +/^\p{Script=Java}/utf + \x{a9df} + 0: \x{a9df} + +# Script extension check +/^\p{Javanese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{scx=Java}/utf + \x{a9cf} + 0: \x{a9cf} + +# Script extension only character +/^\p{Javanese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{sc=Javanese}/utf + \x{a9cf} +No match + +# Character not in script +/^\p{Javanese}/utf + \x{a9e0} +No match + +# Base script check +/^\p{sc=Old_Turkic}/utf + \x{10c00} + 0: \x{10c00} + +/^\p{Script=Orkh}/utf + \x{10c48} + 0: \x{10c48} + +# Script extension check +/^\p{Old_Turkic}/utf + \x{205a} + 0: \x{205a} + +/^\p{Script_Extensions=Orkh}/utf + \x{2e30} + 0: \x{2e30} + +# Script extension only character +/^\p{Old_Turkic}/utf + \x{205a} + 0: \x{205a} + +/^\p{sc=Old_Turkic}/utf + \x{205a} +No match + +# Character not in script +/^\p{Old_Turkic}/utf + \x{10c49} +No match + +# Base script check +/^\p{sc=Kaithi}/utf + \x{11080} + 0: \x{11080} + +/^\p{Script=Kthi}/utf + \x{110cd} + 0: \x{110cd} + +# Script extension check +/^\p{Kaithi}/utf + \x{966} + 0: \x{966} + +/^\p{scx=Kthi}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Kaithi}/utf + \x{966} + 0: \x{966} + +/^\p{sc=Kaithi}/utf + \x{966} +No match + +# Character not in script +/^\p{Kaithi}/utf + \x{110ce} +No match + +# Base script check +/^\p{sc=Mandaic}/utf + \x{840} + 0: \x{840} + +/^\p{Script=Mand}/utf + \x{85e} + 0: \x{85e} + +# Script extension check +/^\p{Mandaic}/utf + \x{640} + 0: \x{640} + +/^\p{Script_Extensions=Mand}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Mandaic}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Mandaic}/utf + \x{640} +No match + +# Character not in script +/^\p{Mandaic}/utf + \x{85f} +No match + +# Base script check +/^\p{sc=Chakma}/utf + \x{11100} + 0: \x{11100} + +/^\p{Script=Cakm}/utf + \x{11147} + 0: \x{11147} + +# Script extension check +/^\p{Chakma}/utf + \x{9e6} + 0: \x{9e6} + +/^\p{scx=Cakm}/utf + \x{1049} + 0: \x{1049} + +# Script extension only character +/^\p{Chakma}/utf + \x{9e6} + 0: \x{9e6} + +/^\p{sc=Chakma}/utf + \x{9e6} +No match + +# Character not in script +/^\p{Chakma}/utf + \x{11148} +No match + +# Base script check +/^\p{sc=Meroitic_Hieroglyphs}/utf + \x{10980} + 0: \x{10980} + +/^\p{Script=Mero}/utf + \x{1099f} + 0: \x{1099f} + +# Script extension check +/^\p{Meroitic_Hieroglyphs}/utf + \x{205d} + 0: \x{205d} + +/^\p{Script_Extensions=Mero}/utf + \x{205d} + 0: \x{205d} + +# Script extension only character +/^\p{Meroitic_Hieroglyphs}/utf + \x{205d} + 0: \x{205d} + +/^\p{sc=Meroitic_Hieroglyphs}/utf + \x{205d} +No match + +# Character not in script +/^\p{Meroitic_Hieroglyphs}/utf + \x{109a0} +No match + +# Base script check +/^\p{sc=Sharada}/utf + \x{11180} + 0: \x{11180} + +/^\p{Script=Shrd}/utf + \x{111df} + 0: \x{111df} + +# Script extension check +/^\p{Sharada}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Shrd}/utf + \x{a838} + 0: \x{a838} + +# Script extension only character +/^\p{Sharada}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Sharada}/utf + \x{951} +No match + +# Character not in script +/^\p{Sharada}/utf + \x{111e0} +No match + +# Base script check +/^\p{sc=Takri}/utf + \x{11680} + 0: \x{11680} + +/^\p{Script=Takr}/utf + \x{116c9} + 0: \x{116c9} + +# Script extension check +/^\p{Takri}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Takr}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Takri}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Takri}/utf + \x{964} +No match + +# Character not in script +/^\p{Takri}/utf + \x{116ca} +No match + +# Base script check +/^\p{sc=Caucasian_Albanian}/utf + \x{10530} + 0: \x{10530} + +/^\p{Script=Aghb}/utf + \x{1056f} + 0: \x{1056f} + +# Script extension check +/^\p{Caucasian_Albanian}/utf + \x{304} + 0: \x{304} + +/^\p{scx=Aghb}/utf + \x{35e} + 0: \x{35e} + +# Script extension only character +/^\p{Caucasian_Albanian}/utf + \x{304} + 0: \x{304} + +/^\p{sc=Caucasian_Albanian}/utf + \x{304} +No match + +# Character not in script +/^\p{Caucasian_Albanian}/utf + \x{10570} +No match + +# Base script check +/^\p{sc=Duployan}/utf + \x{1bc00} + 0: \x{1bc00} + +/^\p{Script=Dupl}/utf + \x{1bc9f} + 0: \x{1bc9f} + +# Script extension check +/^\p{Duployan}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Dupl}/utf + \x{1bca3} + 0: \x{1bca3} + +# Script extension only character +/^\p{Duployan}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Duployan}/utf + \x{b7} +No match + +# Character not in script +/^\p{Duployan}/utf + \x{1bca4} +No match + +# Base script check +/^\p{sc=Elbasan}/utf + \x{10500} + 0: \x{10500} + +/^\p{Script=Elba}/utf + \x{10527} + 0: \x{10527} + +# Script extension check +/^\p{Elbasan}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Elba}/utf + \x{305} + 0: \x{305} + +# Script extension only character +/^\p{Elbasan}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Elbasan}/utf + \x{b7} +No match + +# Character not in script +/^\p{Elbasan}/utf + \x{10528} +No match + +# Base script check +/^\p{sc=Grantha}/utf + \x{11300} + 0: \x{11300} + +/^\p{Script=Gran}/utf + \x{11374} + 0: \x{11374} + +# Script extension check +/^\p{Grantha}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Gran}/utf + \x{11fd3} + 0: \x{11fd3} + +# Script extension only character +/^\p{Grantha}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Grantha}/utf + \x{951} +No match + +# Character not in script +/^\p{Grantha}/utf + \x{11fd4} +No match + +# Base script check +/^\p{sc=Khojki}/utf + \x{11200} + 0: \x{11200} + +/^\p{Script=Khoj}/utf + \x{11241} + 0: \x{11241} + +# Script extension check +/^\p{Khojki}/utf + \x{ae6} + 0: \x{ae6} + +/^\p{scx=Khoj}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Khojki}/utf + \x{ae6} + 0: \x{ae6} + +/^\p{sc=Khojki}/utf + \x{ae6} +No match + +# Character not in script +/^\p{Khojki}/utf + \x{11242} +No match + +# Base script check +/^\p{sc=Linear_A}/utf + \x{10600} + 0: \x{10600} + +/^\p{Script=Lina}/utf + \x{10767} + 0: \x{10767} + +# Script extension check +/^\p{Linear_A}/utf + \x{10107} + 0: \x{10107} + +/^\p{Script_Extensions=Lina}/utf + \x{10133} + 0: \x{10133} + +# Script extension only character +/^\p{Linear_A}/utf + \x{10107} + 0: \x{10107} + +/^\p{sc=Linear_A}/utf + \x{10107} +No match + +# Character not in script +/^\p{Linear_A}/utf + \x{10768} +No match + +# Base script check +/^\p{sc=Mahajani}/utf + \x{11150} + 0: \x{11150} + +/^\p{Script=Mahj}/utf + \x{11176} + 0: \x{11176} + +# Script extension check +/^\p{Mahajani}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Mahj}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Mahajani}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Mahajani}/utf + \x{b7} +No match + +# Character not in script +/^\p{Mahajani}/utf + \x{11177} +No match + +# Base script check +/^\p{sc=Manichaean}/utf + \x{10ac0} + 0: \x{10ac0} + +/^\p{Script=Mani}/utf + \x{10af6} + 0: \x{10af6} + +# Script extension check +/^\p{Manichaean}/utf + \x{640} + 0: \x{640} + +/^\p{Script_Extensions=Mani}/utf + \x{10af2} + 0: \x{10af2} + +# Script extension only character +/^\p{Manichaean}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Manichaean}/utf + \x{640} +No match + +# Character not in script +/^\p{Manichaean}/utf + \x{10af7} +No match + +# Base script check +/^\p{sc=Modi}/utf + \x{11600} + 0: \x{11600} + +/^\p{Script=Modi}/utf + \x{11659} + 0: \x{11659} + +# Script extension check +/^\p{Modi}/utf + \x{a830} + 0: \x{a830} + +/^\p{scx=Modi}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Modi}/utf + \x{a830} + 0: \x{a830} + +/^\p{sc=Modi}/utf + \x{a830} +No match + +# Character not in script +/^\p{Modi}/utf + \x{1165a} +No match + +# Base script check +/^\p{sc=Old_Permic}/utf + \x{10350} + 0: \x{10350} + +/^\p{Script=Perm}/utf + \x{1037a} + 0: \x{1037a} + +# Script extension check +/^\p{Old_Permic}/utf + \x{b7} + 0: \x{b7} + +/^\p{Script_Extensions=Perm}/utf + \x{483} + 0: \x{483} + +# Script extension only character +/^\p{Old_Permic}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Old_Permic}/utf + \x{b7} +No match + +# Character not in script +/^\p{Old_Permic}/utf + \x{1037b} +No match + +# Base script check +/^\p{sc=Psalter_Pahlavi}/utf + \x{10b80} + 0: \x{10b80} + +/^\p{Script=Phlp}/utf + \x{10baf} + 0: \x{10baf} + +# Script extension check +/^\p{Psalter_Pahlavi}/utf + \x{640} + 0: \x{640} + +/^\p{scx=Phlp}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Psalter_Pahlavi}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Psalter_Pahlavi}/utf + \x{640} +No match + +# Character not in script +/^\p{Psalter_Pahlavi}/utf + \x{10bb0} +No match + +# Base script check +/^\p{sc=Khudawadi}/utf + \x{112b0} + 0: \x{112b0} + +/^\p{Script=Sind}/utf + \x{112f9} + 0: \x{112f9} + +# Script extension check +/^\p{Khudawadi}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Sind}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Khudawadi}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Khudawadi}/utf + \x{964} +No match + +# Character not in script +/^\p{Khudawadi}/utf + \x{112fa} +No match + +# Base script check +/^\p{sc=Tirhuta}/utf + \x{11480} + 0: \x{11480} + +/^\p{Script=Tirh}/utf + \x{114d9} + 0: \x{114d9} + +# Script extension check +/^\p{Tirhuta}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Tirh}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Tirhuta}/utf + \x{951} + 0: \x{951} + +/^\p{sc=Tirhuta}/utf + \x{951} +No match + +# Character not in script +/^\p{Tirhuta}/utf + \x{114da} +No match + +# Base script check +/^\p{sc=Multani}/utf + \x{11280} + 0: \x{11280} + +/^\p{Script=Mult}/utf + \x{112a9} + 0: \x{112a9} + +# Script extension check +/^\p{Multani}/utf + \x{a66} + 0: \x{a66} + +/^\p{Script_Extensions=Mult}/utf + \x{a6f} + 0: \x{a6f} + +# Script extension only character +/^\p{Multani}/utf + \x{a66} + 0: \x{a66} + +/^\p{sc=Multani}/utf + \x{a66} +No match + +# Character not in script +/^\p{Multani}/utf + \x{112aa} +No match + +# Base script check +/^\p{sc=Old_Hungarian}/utf + \x{10c80} + 0: \x{10c80} + +/^\p{Script=Hung}/utf + \x{10cff} + 0: \x{10cff} + +# Script extension check +/^\p{Old_Hungarian}/utf + \x{205a} + 0: \x{205a} + +/^\p{scx=Hung}/utf + \x{2e41} + 0: \x{2e41} + +# Script extension only character +/^\p{Old_Hungarian}/utf + \x{205a} + 0: \x{205a} + +/^\p{sc=Old_Hungarian}/utf + \x{205a} +No match + +# Character not in script +/^\p{Old_Hungarian}/utf + \x{10d00} +No match + +# Base script check +/^\p{sc=Adlam}/utf + \x{1e900} + 0: \x{1e900} + +/^\p{Script=Adlm}/utf + \x{1e95f} + 0: \x{1e95f} + +# Script extension check +/^\p{Adlam}/utf + \x{61f} + 0: \x{61f} + +/^\p{Script_Extensions=Adlm}/utf + \x{2e41} + 0: \x{2e41} + +# Script extension only character +/^\p{Adlam}/utf + \x{61f} + 0: \x{61f} + +/^\p{sc=Adlam}/utf + \x{61f} +No match + +# Character not in script +/^\p{Adlam}/utf + \x{1e960} +No match + +# Base script check +/^\p{sc=Osage}/utf + \x{104b0} + 0: \x{104b0} + +/^\p{Script=Osge}/utf + \x{104fb} + 0: \x{104fb} + +# Script extension check +/^\p{Osage}/utf + \x{301} + 0: \x{301} + +/^\p{scx=Osge}/utf + \x{358} + 0: \x{358} + +# Script extension only character +/^\p{Osage}/utf + \x{301} + 0: \x{301} + +/^\p{sc=Osage}/utf + \x{301} +No match + +# Character not in script +/^\p{Osage}/utf + \x{104fc} +No match + +# Base script check +/^\p{sc=Tangut}/utf + \x{16fe0} + 0: \x{16fe0} + +/^\p{Script=Tang}/utf + \x{18d08} + 0: \x{18d08} + +# Script extension check +/^\p{Tangut}/utf + \x{2ff0} + 0: \x{2ff0} + +/^\p{Script_Extensions=Tang}/utf + \x{31ef} + 0: \x{31ef} + +# Script extension only character +/^\p{Tangut}/utf + \x{2ff0} + 0: \x{2ff0} + +/^\p{sc=Tangut}/utf + \x{2ff0} +No match + +# Character not in script +/^\p{Tangut}/utf + \x{18d09} +No match + +# Base script check +/^\p{sc=Masaram_Gondi}/utf + \x{11d00} + 0: \x{11d00} + +/^\p{Script=Gonm}/utf + \x{11d59} + 0: \x{11d59} + +# Script extension check +/^\p{Masaram_Gondi}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Gonm}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Masaram_Gondi}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Masaram_Gondi}/utf + \x{964} +No match + +# Character not in script +/^\p{Masaram_Gondi}/utf + \x{11d5a} +No match + +# Base script check +/^\p{sc=Dogra}/utf + \x{11800} + 0: \x{11800} + +/^\p{Script=Dogr}/utf + \x{1183b} + 0: \x{1183b} + +# Script extension check +/^\p{Dogra}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Dogr}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Dogra}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Dogra}/utf + \x{964} +No match + +# Character not in script +/^\p{Dogra}/utf + \x{1183c} +No match + +# Base script check +/^\p{sc=Gunjala_Gondi}/utf + \x{11d60} + 0: \x{11d60} + +/^\p{Script=Gong}/utf + \x{11da9} + 0: \x{11da9} + +# Script extension check +/^\p{Gunjala_Gondi}/utf + \x{b7} + 0: \x{b7} + +/^\p{scx=Gong}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Gunjala_Gondi}/utf + \x{b7} + 0: \x{b7} + +/^\p{sc=Gunjala_Gondi}/utf + \x{b7} +No match + +# Character not in script +/^\p{Gunjala_Gondi}/utf + \x{11daa} +No match + +# Base script check +/^\p{sc=Hanifi_Rohingya}/utf + \x{10d00} + 0: \x{10d00} + +/^\p{Script=Rohg}/utf + \x{10d39} + 0: \x{10d39} + +# Script extension check +/^\p{Hanifi_Rohingya}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Rohg}/utf + \x{6d4} + 0: \x{6d4} + +# Script extension only character +/^\p{Hanifi_Rohingya}/utf + \x{60c} + 0: \x{60c} + +/^\p{sc=Hanifi_Rohingya}/utf + \x{60c} +No match + +# Character not in script +/^\p{Hanifi_Rohingya}/utf + \x{10d3a} +No match + +# Base script check +/^\p{sc=Sogdian}/utf + \x{10f30} + 0: \x{10f30} + +/^\p{Script=Sogd}/utf + \x{10f59} + 0: \x{10f59} + +# Script extension check +/^\p{Sogdian}/utf + \x{640} + 0: \x{640} + +/^\p{scx=Sogd}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Sogdian}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Sogdian}/utf + \x{640} +No match + +# Character not in script +/^\p{Sogdian}/utf + \x{10f5a} +No match + +# Base script check +/^\p{sc=Nandinagari}/utf + \x{119a0} + 0: \x{119a0} + +/^\p{Script=Nand}/utf + \x{119e4} + 0: \x{119e4} + +# Script extension check +/^\p{Nandinagari}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Nand}/utf + \x{a835} + 0: \x{a835} + +# Script extension only character +/^\p{Nandinagari}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Nandinagari}/utf + \x{964} +No match + +# Character not in script +/^\p{Nandinagari}/utf + \x{119e5} +No match + +# Base script check +/^\p{sc=Yezidi}/utf + \x{10e80} + 0: \x{10e80} + +/^\p{Script=Yezi}/utf + \x{10eb1} + 0: \x{10eb1} + +# Script extension check +/^\p{Yezidi}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Yezi}/utf + \x{669} + 0: \x{669} + +# Script extension only character +/^\p{Yezidi}/utf + \x{60c} + 0: \x{60c} + +/^\p{sc=Yezidi}/utf + \x{60c} +No match + +# Character not in script +/^\p{Yezidi}/utf + \x{10eb2} +No match + +# Base script check +/^\p{sc=Cypro_Minoan}/utf + \x{12f90} + 0: \x{12f90} + +/^\p{Script=Cpmn}/utf + \x{12ff2} + 0: \x{12ff2} + +# Script extension check +/^\p{Cypro_Minoan}/utf + \x{10100} + 0: \x{10100} + +/^\p{Script_Extensions=Cpmn}/utf + \x{10101} + 0: \x{10101} + +# Script extension only character +/^\p{Cypro_Minoan}/utf + \x{10100} + 0: \x{10100} + +/^\p{sc=Cypro_Minoan}/utf + \x{10100} +No match + +# Character not in script +/^\p{Cypro_Minoan}/utf + \x{12ff3} +No match + +# Base script check +/^\p{sc=Old_Uyghur}/utf + \x{10f70} + 0: \x{10f70} + +/^\p{Script=Ougr}/utf + \x{10f89} + 0: \x{10f89} + +# Script extension check +/^\p{Old_Uyghur}/utf + \x{640} + 0: \x{640} + +/^\p{scx=Ougr}/utf + \x{10af2} + 0: \x{10af2} + +# Script extension only character +/^\p{Old_Uyghur}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Old_Uyghur}/utf + \x{640} +No match + +# Character not in script +/^\p{Old_Uyghur}/utf + \x{10f8a} +No match + +# Base script check +/^\p{sc=Toto}/utf + \x{1e290} + 0: \x{1e290} + +/^\p{Script=Toto}/utf + \x{1e2ae} + 0: \x{1e2ae} + +# Script extension check +/^\p{Toto}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{Script_Extensions=Toto}/utf + \x{2bc} + 0: \x{2bc} + +# Script extension only character +/^\p{Toto}/utf + \x{2bc} + 0: \x{2bc} + +/^\p{sc=Toto}/utf + \x{2bc} +No match + +# Character not in script +/^\p{Toto}/utf + \x{1e2af} +No match + +# Base script check +/^\p{sc=Garay}/utf + \x{10d40} + 0: \x{10d40} + +/^\p{Script=Gara}/utf + \x{10d8f} + 0: \x{10d8f} + +# Script extension check +/^\p{Garay}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Gara}/utf + \x{61f} + 0: \x{61f} + +# Script extension only character +/^\p{Garay}/utf + \x{60c} + 0: \x{60c} + +/^\p{sc=Garay}/utf + \x{60c} +No match + +# Character not in script +/^\p{Garay}/utf + \x{10d90} +No match + +# Base script check +/^\p{sc=Gurung_Khema}/utf + \x{16100} + 0: \x{16100} + +/^\p{Script=Gukh}/utf + \x{16139} + 0: \x{16139} + +# Script extension check +/^\p{Gurung_Khema}/utf + \x{965} + 0: \x{965} + +/^\p{Script_Extensions=Gukh}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Gurung_Khema}/utf + \x{965} + 0: \x{965} + +/^\p{sc=Gurung_Khema}/utf + \x{965} +No match + +# Character not in script +/^\p{Gurung_Khema}/utf + \x{1613a} +No match + +# Base script check +/^\p{sc=Ol_Onal}/utf + \x{1e5d0} + 0: \x{1e5d0} + +/^\p{Script=Onao}/utf + \x{1e5ff} + 0: \x{1e5ff} + +# Script extension check +/^\p{Ol_Onal}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Onao}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Ol_Onal}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Ol_Onal}/utf + \x{964} +No match + +# Character not in script +/^\p{Ol_Onal}/utf + \x{1e600} +No match + +# Base script check +/^\p{sc=Sunuwar}/utf + \x{11bc0} + 0: \x{11bc0} + +/^\p{Script=Sunu}/utf + \x{11bf9} + 0: \x{11bf9} + +# Script extension check +/^\p{Sunuwar}/utf + \x{300} + 0: \x{300} + +/^\p{Script_Extensions=Sunu}/utf + \x{331} + 0: \x{331} + +# Script extension only character +/^\p{Sunuwar}/utf + \x{300} + 0: \x{300} + +/^\p{sc=Sunuwar}/utf + \x{300} +No match + +# Character not in script +/^\p{Sunuwar}/utf + \x{11bfa} +No match + +# Base script check +/^\p{sc=Todhri}/utf + \x{105c0} + 0: \x{105c0} + +/^\p{Script=Todr}/utf + \x{105f3} + 0: \x{105f3} + +# Script extension check +/^\p{Todhri}/utf + \x{301} + 0: \x{301} + +/^\p{scx=Todr}/utf + \x{35e} + 0: \x{35e} + +# Script extension only character +/^\p{Todhri}/utf + \x{301} + 0: \x{301} + +/^\p{sc=Todhri}/utf + \x{301} +No match + +# Character not in script +/^\p{Todhri}/utf + \x{105f4} +No match + +# Base script check +/^\p{sc=Tulu_Tigalari}/utf + \x{11380} + 0: \x{11380} + +/^\p{Script=Tutg}/utf + \x{113e2} + 0: \x{113e2} + +# Script extension check +/^\p{Tulu_Tigalari}/utf + \x{ce6} + 0: \x{ce6} + +/^\p{Script_Extensions=Tutg}/utf + \x{a8f1} + 0: \x{a8f1} + +# Script extension only character +/^\p{Tulu_Tigalari}/utf + \x{ce6} + 0: \x{ce6} + +/^\p{sc=Tulu_Tigalari}/utf + \x{ce6} +No match + +# Character not in script +/^\p{Tulu_Tigalari}/utf + \x{113e3} +No match + +# Base script check +/^\p{sc=Common}/utf + \x{00} + 0: \x{00} + +/^\p{Script=Zyyy}/utf + \x{e007f} + 0: \x{e007f} + +# Character not in script +/^\p{Common}/utf + \x{e0080} +No match + +# Base script check +/^\p{sc=Lao}/utf + \x{e81} + 0: \x{e81} + +/^\p{Script=Laoo}/utf + \x{edf} + 0: \x{edf} + +# Character not in script +/^\p{Lao}/utf + \x{ee0} +No match + +# Base script check +/^\p{sc=Canadian_Aboriginal}/utf + \x{1400} + 0: \x{1400} + +/^\p{Script=Cans}/utf + \x{11abf} + 0: \x{11abf} + +# Character not in script +/^\p{Canadian_Aboriginal}/utf + \x{11ac0} +No match + +# Base script check +/^\p{sc=Ogham}/utf + \x{1680} + 0: \x{1680} + +/^\p{Script=Ogam}/utf + \x{169c} + 0: \x{169c} + +# Character not in script +/^\p{Ogham}/utf + \x{169d} +No match + +# Base script check +/^\p{sc=Khmer}/utf + \x{1780} + 0: \x{1780} + +/^\p{Script=Khmr}/utf + \x{19ff} + 0: \x{19ff} + +# Character not in script +/^\p{Khmer}/utf + \x{1a00} +No match + +# Base script check +/^\p{sc=Old_Italic}/utf + \x{10300} + 0: \x{10300} + +/^\p{Script=Ital}/utf + \x{1032f} + 0: \x{1032f} + +# Character not in script +/^\p{Old_Italic}/utf + \x{10330} +No match + +# Base script check +/^\p{sc=Deseret}/utf + \x{10400} + 0: \x{10400} + +/^\p{Script=Dsrt}/utf + \x{1044f} + 0: \x{1044f} + +# Character not in script +/^\p{Deseret}/utf + \x{10450} +No match + +# Base script check +/^\p{sc=Inherited}/utf + \x{300} + 0: \x{300} + +/^\p{Script=Zinh}/utf + \x{e01ef} + 0: \x{e01ef} + +# Character not in script +/^\p{Inherited}/utf + \x{e01f0} +No match + +# Base script check +/^\p{sc=Ugaritic}/utf + \x{10380} + 0: \x{10380} + +/^\p{Script=Ugar}/utf + \x{1039f} + 0: \x{1039f} + +# Character not in script +/^\p{Ugaritic}/utf + \x{103a0} +No match + +# Base script check +/^\p{sc=Osmanya}/utf + \x{10480} + 0: \x{10480} + +/^\p{Script=Osma}/utf + \x{104a9} + 0: \x{104a9} + +# Character not in script +/^\p{Osmanya}/utf + \x{104aa} +No match + +# Base script check +/^\p{sc=Braille}/utf + \x{2800} + 0: \x{2800} + +/^\p{Script=Brai}/utf + \x{28ff} + 0: \x{28ff} + +# Character not in script +/^\p{Braille}/utf + \x{2900} +No match + +# Base script check +/^\p{sc=New_Tai_Lue}/utf + \x{1980} + 0: \x{1980} + +/^\p{Script=Talu}/utf + \x{19df} + 0: \x{19df} + +# Character not in script +/^\p{New_Tai_Lue}/utf + \x{19e0} +No match + +# Base script check +/^\p{sc=Old_Persian}/utf + \x{103a0} + 0: \x{103a0} + +/^\p{Script=Xpeo}/utf + \x{103d5} + 0: \x{103d5} + +# Character not in script +/^\p{Old_Persian}/utf + \x{103d6} +No match + +# Base script check +/^\p{sc=Kharoshthi}/utf + \x{10a00} + 0: \x{10a00} + +/^\p{Script=Khar}/utf + \x{10a58} + 0: \x{10a58} + +# Character not in script +/^\p{Kharoshthi}/utf + \x{10a59} +No match + +# Base script check +/^\p{sc=Balinese}/utf + \x{1b00} + 0: \x{1b00} + +/^\p{Script=Bali}/utf + \x{1b7f} + 0: \x{1b7f} + +# Character not in script +/^\p{Balinese}/utf + \x{1b80} +No match + +# Base script check +/^\p{sc=Cuneiform}/utf + \x{12000} + 0: \x{12000} + +/^\p{Script=Xsux}/utf + \x{12543} + 0: \x{12543} + +# Character not in script +/^\p{Cuneiform}/utf + \x{12544} +No match + +# Base script check +/^\p{sc=Phoenician}/utf + \x{10900} + 0: \x{10900} + +/^\p{Script=Phnx}/utf + \x{1091f} + 0: \x{1091f} + +# Character not in script +/^\p{Phoenician}/utf + \x{10920} +No match + +# Base script check +/^\p{sc=Sundanese}/utf + \x{1b80} + 0: \x{1b80} + +/^\p{Script=Sund}/utf + \x{1cc7} + 0: \x{1cc7} + +# Character not in script +/^\p{Sundanese}/utf + \x{1cc8} +No match + +# Base script check +/^\p{sc=Lepcha}/utf + \x{1c00} + 0: \x{1c00} + +/^\p{Script=Lepc}/utf + \x{1c4f} + 0: \x{1c4f} + +# Character not in script +/^\p{Lepcha}/utf + \x{1c50} +No match + +# Base script check +/^\p{sc=Ol_Chiki}/utf + \x{1c50} + 0: \x{1c50} + +/^\p{Script=Olck}/utf + \x{1c7f} + 0: \x{1c7f} + +# Character not in script +/^\p{Ol_Chiki}/utf + \x{1c80} +No match + +# Base script check +/^\p{sc=Vai}/utf + \x{a500} + 0: \x{a500} + +/^\p{Script=Vaii}/utf + \x{a62b} + 0: \x{a62b} + +# Character not in script +/^\p{Vai}/utf + \x{a62c} +No match + +# Base script check +/^\p{sc=Saurashtra}/utf + \x{a880} + 0: \x{a880} + +/^\p{Script=Saur}/utf + \x{a8d9} + 0: \x{a8d9} + +# Character not in script +/^\p{Saurashtra}/utf + \x{a8da} +No match + +# Base script check +/^\p{sc=Rejang}/utf + \x{a930} + 0: \x{a930} + +/^\p{Script=Rjng}/utf + \x{a95f} + 0: \x{a95f} + +# Character not in script +/^\p{Rejang}/utf + \x{a960} +No match + +# Base script check +/^\p{sc=Cham}/utf + \x{aa00} + 0: \x{aa00} + +/^\p{Script=Cham}/utf + \x{aa5f} + 0: \x{aa5f} + +# Character not in script +/^\p{Cham}/utf + \x{aa60} +No match + +# Base script check +/^\p{sc=Tai_Tham}/utf + \x{1a20} + 0: \x{1a20} + +/^\p{Script=Lana}/utf + \x{1aad} + 0: \x{1aad} + +# Character not in script +/^\p{Tai_Tham}/utf + \x{1aae} +No match + +# Base script check +/^\p{sc=Tai_Viet}/utf + \x{aa80} + 0: \x{aa80} + +/^\p{Script=Tavt}/utf + \x{aadf} + 0: \x{aadf} + +# Character not in script +/^\p{Tai_Viet}/utf + \x{aae0} +No match + +# Base script check +/^\p{sc=Egyptian_Hieroglyphs}/utf + \x{13000} + 0: \x{13000} + +/^\p{Script=Egyp}/utf + \x{143fa} + 0: \x{143fa} + +# Character not in script +/^\p{Egyptian_Hieroglyphs}/utf + \x{143fb} +No match + +# Base script check +/^\p{sc=Bamum}/utf + \x{a6a0} + 0: \x{a6a0} + +/^\p{Script=Bamu}/utf + \x{16a38} + 0: \x{16a38} + +# Character not in script +/^\p{Bamum}/utf + \x{16a39} +No match + +# Base script check +/^\p{sc=Meetei_Mayek}/utf + \x{aae0} + 0: \x{aae0} + +/^\p{Script=Mtei}/utf + \x{abf9} + 0: \x{abf9} + +# Character not in script +/^\p{Meetei_Mayek}/utf + \x{abfa} +No match + +# Base script check +/^\p{sc=Imperial_Aramaic}/utf + \x{10840} + 0: \x{10840} + +/^\p{Script=Armi}/utf + \x{1085f} + 0: \x{1085f} + +# Character not in script +/^\p{Imperial_Aramaic}/utf + \x{10860} +No match + +# Base script check +/^\p{sc=Old_South_Arabian}/utf + \x{10a60} + 0: \x{10a60} + +/^\p{Script=Sarb}/utf + \x{10a7f} + 0: \x{10a7f} + +# Character not in script +/^\p{Old_South_Arabian}/utf + \x{10a80} +No match + +# Base script check +/^\p{sc=Inscriptional_Parthian}/utf + \x{10b40} + 0: \x{10b40} + +/^\p{Script=Prti}/utf + \x{10b5f} + 0: \x{10b5f} + +# Character not in script +/^\p{Inscriptional_Parthian}/utf + \x{10b60} +No match + +# Base script check +/^\p{sc=Inscriptional_Pahlavi}/utf + \x{10b60} + 0: \x{10b60} + +/^\p{Script=Phli}/utf + \x{10b7f} + 0: \x{10b7f} + +# Character not in script +/^\p{Inscriptional_Pahlavi}/utf + \x{10b80} +No match + +# Base script check +/^\p{sc=Batak}/utf + \x{1bc0} + 0: \x{1bc0} + +/^\p{Script=Batk}/utf + \x{1bff} + 0: \x{1bff} + +# Character not in script +/^\p{Batak}/utf + \x{1c00} +No match + +# Base script check +/^\p{sc=Brahmi}/utf + \x{11000} + 0: \x{11000} + +/^\p{Script=Brah}/utf + \x{1107f} + 0: \x{1107f} + +# Character not in script +/^\p{Brahmi}/utf + \x{11080} +No match + +# Base script check +/^\p{sc=Meroitic_Cursive}/utf + \x{109a0} + 0: \x{109a0} + +/^\p{Script=Merc}/utf + \x{109ff} + 0: \x{109ff} + +# Character not in script +/^\p{Meroitic_Cursive}/utf + \x{10a00} +No match + +# Base script check +/^\p{sc=Miao}/utf + \x{16f00} + 0: \x{16f00} + +/^\p{Script=Plrd}/utf + \x{16f9f} + 0: \x{16f9f} + +# Character not in script +/^\p{Miao}/utf + \x{16fa0} +No match + +# Base script check +/^\p{sc=Sora_Sompeng}/utf + \x{110d0} + 0: \x{110d0} + +/^\p{Script=Sora}/utf + \x{110f9} + 0: \x{110f9} + +# Character not in script +/^\p{Sora_Sompeng}/utf + \x{110fa} +No match + +# Base script check +/^\p{sc=Bassa_Vah}/utf + \x{16ad0} + 0: \x{16ad0} + +/^\p{Script=Bass}/utf + \x{16af5} + 0: \x{16af5} + +# Character not in script +/^\p{Bassa_Vah}/utf + \x{16af6} +No match + +# Base script check +/^\p{sc=Pahawh_Hmong}/utf + \x{16b00} + 0: \x{16b00} + +/^\p{Script=Hmng}/utf + \x{16b8f} + 0: \x{16b8f} + +# Character not in script +/^\p{Pahawh_Hmong}/utf + \x{16b90} +No match + +# Base script check +/^\p{sc=Mende_Kikakui}/utf + \x{1e800} + 0: \x{1e800} + +/^\p{Script=Mend}/utf + \x{1e8d6} + 0: \x{1e8d6} + +# Character not in script +/^\p{Mende_Kikakui}/utf + \x{1e8d7} +No match + +# Base script check +/^\p{sc=Mro}/utf + \x{16a40} + 0: \x{16a40} + +/^\p{Script=Mroo}/utf + \x{16a6f} + 0: \x{16a6f} + +# Character not in script +/^\p{Mro}/utf + \x{16a70} +No match + +# Base script check +/^\p{sc=Old_North_Arabian}/utf + \x{10a80} + 0: \x{10a80} + +/^\p{Script=Narb}/utf + \x{10a9f} + 0: \x{10a9f} + +# Character not in script +/^\p{Old_North_Arabian}/utf + \x{10aa0} +No match + +# Base script check +/^\p{sc=Nabataean}/utf + \x{10880} + 0: \x{10880} + +/^\p{Script=Nbat}/utf + \x{108af} + 0: \x{108af} + +# Character not in script +/^\p{Nabataean}/utf + \x{108b0} +No match + +# Base script check +/^\p{sc=Palmyrene}/utf + \x{10860} + 0: \x{10860} + +/^\p{Script=Palm}/utf + \x{1087f} + 0: \x{1087f} + +# Character not in script +/^\p{Palmyrene}/utf + \x{10880} +No match + +# Base script check +/^\p{sc=Pau_Cin_Hau}/utf + \x{11ac0} + 0: \x{11ac0} + +/^\p{Script=Pauc}/utf + \x{11af8} + 0: \x{11af8} + +# Character not in script +/^\p{Pau_Cin_Hau}/utf + \x{11af9} +No match + +# Base script check +/^\p{sc=Siddham}/utf + \x{11580} + 0: \x{11580} + +/^\p{Script=Sidd}/utf + \x{115dd} + 0: \x{115dd} + +# Character not in script +/^\p{Siddham}/utf + \x{115de} +No match + +# Base script check +/^\p{sc=Warang_Citi}/utf + \x{118a0} + 0: \x{118a0} + +/^\p{Script=Wara}/utf + \x{118ff} + 0: \x{118ff} + +# Character not in script +/^\p{Warang_Citi}/utf + \x{11900} +No match + +# Base script check +/^\p{sc=Ahom}/utf + \x{11700} + 0: \x{11700} + +/^\p{Script=Ahom}/utf + \x{11746} + 0: \x{11746} + +# Character not in script +/^\p{Ahom}/utf + \x{11747} +No match + +# Base script check +/^\p{sc=Anatolian_Hieroglyphs}/utf + \x{14400} + 0: \x{14400} + +/^\p{Script=Hluw}/utf + \x{14646} + 0: \x{14646} + +# Character not in script +/^\p{Anatolian_Hieroglyphs}/utf + \x{14647} +No match + +# Base script check +/^\p{sc=Hatran}/utf + \x{108e0} + 0: \x{108e0} + +/^\p{Script=Hatr}/utf + \x{108ff} + 0: \x{108ff} + +# Character not in script +/^\p{Hatran}/utf + \x{10900} +No match + +# Base script check +/^\p{sc=SignWriting}/utf + \x{1d800} + 0: \x{1d800} + +/^\p{Script=Sgnw}/utf + \x{1daaf} + 0: \x{1daaf} + +# Character not in script +/^\p{SignWriting}/utf + \x{1dab0} +No match + +# Base script check +/^\p{sc=Bhaiksuki}/utf + \x{11c00} + 0: \x{11c00} + +/^\p{Script=Bhks}/utf + \x{11c6c} + 0: \x{11c6c} + +# Character not in script +/^\p{Bhaiksuki}/utf + \x{11c6d} +No match + +# Base script check +/^\p{sc=Marchen}/utf + \x{11c70} + 0: \x{11c70} + +/^\p{Script=Marc}/utf + \x{11cb6} + 0: \x{11cb6} + +# Character not in script +/^\p{Marchen}/utf + \x{11cb7} +No match + +# Base script check +/^\p{sc=Newa}/utf + \x{11400} + 0: \x{11400} + +/^\p{Script=Newa}/utf + \x{11461} + 0: \x{11461} + +# Character not in script +/^\p{Newa}/utf + \x{11462} +No match + +# Base script check +/^\p{sc=Nushu}/utf + \x{16fe1} + 0: \x{16fe1} + +/^\p{Script=Nshu}/utf + \x{1b2fb} + 0: \x{1b2fb} + +# Character not in script +/^\p{Nushu}/utf + \x{1b2fc} +No match + +# Base script check +/^\p{sc=Soyombo}/utf + \x{11a50} + 0: \x{11a50} + +/^\p{Script=Soyo}/utf + \x{11aa2} + 0: \x{11aa2} + +# Character not in script +/^\p{Soyombo}/utf + \x{11aa3} +No match + +# Base script check +/^\p{sc=Zanabazar_Square}/utf + \x{11a00} + 0: \x{11a00} + +/^\p{Script=Zanb}/utf + \x{11a47} + 0: \x{11a47} + +# Character not in script +/^\p{Zanabazar_Square}/utf + \x{11a48} +No match + +# Base script check +/^\p{sc=Makasar}/utf + \x{11ee0} + 0: \x{11ee0} + +/^\p{Script=Maka}/utf + \x{11ef8} + 0: \x{11ef8} + +# Character not in script +/^\p{Makasar}/utf + \x{11ef9} +No match + +# Base script check +/^\p{sc=Medefaidrin}/utf + \x{16e40} + 0: \x{16e40} + +/^\p{Script=Medf}/utf + \x{16e9a} + 0: \x{16e9a} + +# Character not in script +/^\p{Medefaidrin}/utf + \x{16e9b} +No match + +# Base script check +/^\p{sc=Old_Sogdian}/utf + \x{10f00} + 0: \x{10f00} + +/^\p{Script=Sogo}/utf + \x{10f27} + 0: \x{10f27} + +# Character not in script +/^\p{Old_Sogdian}/utf + \x{10f28} +No match + +# Base script check +/^\p{sc=Elymaic}/utf + \x{10fe0} + 0: \x{10fe0} + +/^\p{Script=Elym}/utf + \x{10ff6} + 0: \x{10ff6} + +# Character not in script +/^\p{Elymaic}/utf + \x{10ff7} +No match + +# Base script check +/^\p{sc=Nyiakeng_Puachue_Hmong}/utf + \x{1e100} + 0: \x{1e100} + +/^\p{Script=Hmnp}/utf + \x{1e14f} + 0: \x{1e14f} + +# Character not in script +/^\p{Nyiakeng_Puachue_Hmong}/utf + \x{1e150} +No match + +# Base script check +/^\p{sc=Wancho}/utf + \x{1e2c0} + 0: \x{1e2c0} + +/^\p{Script=Wcho}/utf + \x{1e2ff} + 0: \x{1e2ff} + +# Character not in script +/^\p{Wancho}/utf + \x{1e300} +No match + +# Base script check +/^\p{sc=Chorasmian}/utf + \x{10fb0} + 0: \x{10fb0} + +/^\p{Script=Chrs}/utf + \x{10fcb} + 0: \x{10fcb} + +# Character not in script +/^\p{Chorasmian}/utf + \x{10fcc} +No match + +# Base script check +/^\p{sc=Dives_Akuru}/utf + \x{11900} + 0: \x{11900} + +/^\p{Script=Diak}/utf + \x{11959} + 0: \x{11959} + +# Character not in script +/^\p{Dives_Akuru}/utf + \x{1195a} +No match + +# Base script check +/^\p{sc=Khitan_Small_Script}/utf + \x{16fe4} + 0: \x{16fe4} + +/^\p{Script=Kits}/utf + \x{18cff} + 0: \x{18cff} + +# Character not in script +/^\p{Khitan_Small_Script}/utf + \x{18d00} +No match + +# Base script check +/^\p{sc=Tangsa}/utf + \x{16a70} + 0: \x{16a70} + +/^\p{Script=Tnsa}/utf + \x{16ac9} + 0: \x{16ac9} + +# Character not in script +/^\p{Tangsa}/utf + \x{16aca} +No match + +# Base script check +/^\p{sc=Vithkuqi}/utf + \x{10570} + 0: \x{10570} + +/^\p{Script=Vith}/utf + \x{105bc} + 0: \x{105bc} + +# Character not in script +/^\p{Vithkuqi}/utf + \x{105bd} +No match + +# Base script check +/^\p{sc=Kawi}/utf + \x{11f00} + 0: \x{11f00} + +/^\p{Script=Kawi}/utf + \x{11f5a} + 0: \x{11f5a} + +# Character not in script +/^\p{Kawi}/utf + \x{11f5b} +No match + +# Base script check +/^\p{sc=Nag_Mundari}/utf + \x{1e4d0} + 0: \x{1e4d0} + +/^\p{Script=Nagm}/utf + \x{1e4f9} + 0: \x{1e4f9} + +# Character not in script +/^\p{Nag_Mundari}/utf + \x{1e4fa} +No match + +# Base script check +/^\p{sc=Kirat_Rai}/utf + \x{16d40} + 0: \x{16d40} + +/^\p{Script=Krai}/utf + \x{16d79} + 0: \x{16d79} + +# Character not in script +/^\p{Kirat_Rai}/utf + \x{16d7a} +No match + +# End of test diff --git a/testdata/testoutput3 b/testdata/testoutput3 index 717fa45..1fd9d97 100644 --- a/testdata/testoutput3 +++ b/testdata/testoutput3 @@ -1,5 +1,5 @@ # This set of tests checks local-specific features, using the "fr_FR" locale. -# It is not Perl-compatible. When run via RunTest, the locale is edited to +# It is almost Perl-compatible. When run via RunTest, the locale is edited to # be whichever of "fr_FR", "french", or "fr" is found to exist. There is # different version of this file called wintestinput3 for use on Windows, # where the locale is called "french" and the tests are run using @@ -16,11 +16,6 @@ No match École 0: École -/^[\w]+/ -\= Expect no match - École -No match - /^[\W]+/ École 0: \xc9 @@ -79,16 +74,16 @@ No match /\w/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /\w/I,locale=fr_FR Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â - ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â + ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 1 # All remaining tests are in the fr_FR locale, so set the default. @@ -115,6 +110,18 @@ No match \x9c No match +/ÿ/i + \xff + 0: ÿ +\= Expect no match + y +No match + +/(.)\1/i + \xfe\xde + 0: þÞ + 1: þ + /\W+/ >>>\xaa<<< 0: >>> @@ -161,10 +168,10 @@ No match End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç - È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í - î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í + î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 3 # End of testinput3 diff --git a/testdata/testoutput3A b/testdata/testoutput3A index 53f6894..5e7e120 100644 --- a/testdata/testoutput3A +++ b/testdata/testoutput3A @@ -1,5 +1,5 @@ # This set of tests checks local-specific features, using the "fr_FR" locale. -# It is not Perl-compatible. When run via RunTest, the locale is edited to +# It is almost Perl-compatible. When run via RunTest, the locale is edited to # be whichever of "fr_FR", "french", or "fr" is found to exist. There is # different version of this file called wintestinput3 for use on Windows, # where the locale is called "french" and the tests are run using @@ -16,11 +16,6 @@ No match École 0: École -/^[\w]+/ -\= Expect no match - École -No match - /^[\W]+/ École 0: \xc9 @@ -79,16 +74,16 @@ No match /\w/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /\w/I,locale=fr_FR Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â - ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â + ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 1 # All remaining tests are in the fr_FR locale, so set the default. @@ -115,6 +110,18 @@ No match \x9c No match +/ÿ/i + \xff + 0: ÿ +\= Expect no match + y +No match + +/(.)\1/i + \xfe\xde + 0: þÞ + 1: þ + /\W+/ >>>\xaa<<< 0: >>> @@ -161,10 +168,10 @@ No match End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç - È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í - î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í + î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 3 # End of testinput3 diff --git a/testdata/testoutput3B b/testdata/testoutput3B index 1e22efe..0dcbee6 100644 --- a/testdata/testoutput3B +++ b/testdata/testoutput3B @@ -1,5 +1,5 @@ # This set of tests checks local-specific features, using the "fr_FR" locale. -# It is not Perl-compatible. When run via RunTest, the locale is edited to +# It is almost Perl-compatible. When run via RunTest, the locale is edited to # be whichever of "fr_FR", "french", or "fr" is found to exist. There is # different version of this file called wintestinput3 for use on Windows, # where the locale is called "french" and the tests are run using @@ -16,11 +16,6 @@ No match École 0: École -/^[\w]+/ -\= Expect no match - École -No match - /^[\W]+/ École 0: \xc9 @@ -79,16 +74,16 @@ No match /\w/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /\w/I,locale=fr_FR Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â - ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â + ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 1 # All remaining tests are in the fr_FR locale, so set the default. @@ -115,6 +110,18 @@ No match \x9c No match +/ÿ/i + \xff + 0: ÿ +\= Expect no match + y +No match + +/(.)\1/i + \xfe\xde + 0: þÞ + 1: þ + /\W+/ >>>\xaa<<< 0: >>> @@ -161,10 +168,10 @@ No match End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç - È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í - î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í + î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 3 # End of testinput3 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 5917ebb..b68ad29 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -1648,6 +1648,19 @@ No match \x{660}\x{661}\x{662}ABC No match +/^\pN{3,}+(.)/utf + \x{7c0}8\x{662}\x{966}\x{95c} + 0: \x{7c0}8\x{662}\x{966}\x{95c} + 1: \x{95c} + \x{7c0}8\x{662}\x{95c} + 0: \x{7c0}8\x{662}\x{95c} + 1: \x{95c} +\= Expect no match + \x{7c0}8\x{662}\x{966} +No match + \x{7c0}8\x{95c} +No match + /(?<=A\p{Nd})XYZ/utf A2XYZ 0: XYZ @@ -1866,6 +1879,19 @@ No match 0: A\x{300}\x{301}B\x{300}C 1: C +/^\X{3,}+/utf + A\x{300}B\x{301}U\x{303}\x{0301} + 0: A\x{300}B\x{301}U\x{303}\x{301} + A\x{300}B\x{301}U\x{303}\x{0301}X + 0: A\x{300}B\x{301}U\x{303}\x{301}X +\= Expect no match + A\x{300} +No match + A\x{300}B\x{301} +No match + A\x{300}U\x{303}\x{0301} +No match + /^\X/utf A 0: A @@ -3009,6 +3035,45 @@ No match \x{0053}\x{0073}\x{017f} 0: Ss\x{17f} +/^[a-z\x{500}-\x{1000}]{3,}[a-h]|x/utf + ab\x{600}ijklmh + 0: ab\x{600}ijklmh + ab\x{600}hijklm + 0: ab\x{600}h +\= Expect no match + ab\x{600}ijklm +No match + +/^[a-z\x{500}-\x{1000}]{4,7}[a-h]|x/utf + ab\x{600}\x{700}ijkh + 0: ab\x{600}\x{700}ijkh + ab\x{600}\x{700}hijkl + 0: ab\x{600}\x{700}h +\= Expect no match + ab\x{600}\x{700}ijklh +No match + ab\x{600}h\x{700}ijklmh +No match + +/([a-z\x{1000}\x{2000}]{1,2}?u)+$/utf + \x{1000}uu\x{2000}u + 0: \x{1000}uu\x{2000}u + 1: u\x{2000}u + \x{1001}uuuu + 0: uuuu + 1: uu + \x{2001}uuuuu + 0: uuuuu + 1: uuu + uuuu\x{1fff}#u#\x{2000}\x{1000}u\x{2000}u + 0: \x{2000}\x{1000}u\x{2000}u + 1: \x{2000}u +\= Expect no match + abuabuabuabu! +No match + uuuuuuuuuuuu# +No match + # -------------------------------------- /(ΣΆΜΟΣ) \1/i,utf @@ -3783,6 +3848,11 @@ No match \x{1234} 0: \x{1234} +/(\x{1234}) \1/utf + \N{U+1234} \o{11064} + 0: \x{1234} \x{1234} + 1: \x{1234} + # Test the full list of Unicode "Pattern White Space" characters that are to # be ignored by /x. The pattern lines below may show up oddly in text editors # or when listed to the screen. Note that characters such as U+2002, which are @@ -4602,4 +4672,320 @@ No match --cafe\x{300}_au\x{203f}lait! 0: cafe +# -------------------------------------------------------------------------- +# Case-independent matching property tests added after changing PCRE2 to be +# compatible with Perl. All three cases (upper, lower, title) conflate. + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/utf + >AbbD< + 0: AbbD + >Abb\x{01c5}< + 0: Abb\x{1c5} +\= Expect no match + >aBBd< +No match + >aB!!< +No match + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf + >aB!!< + 0: aB!! + >\x{01c5}B!!< + 0: \x{1c5}B!! +\= Expect no match + >AbbD< +No match + >aBBd< +No match + >Abb\x{01c5}< +No match + +/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf + >aB!!< + 0: aB!! +\= Expect no match + >AbbD< +No match + >aBBd< +No match + >Abb\x{01c5}< +No match + +/[\p{Lt}\x{36b}][\P{Lt}\x{10a0}]/i,utf + >A!< + 0: A! + >\x{3c9}\x{58d}< + 0: \x{3c9}\x{58d} + >\x{413}\x{940}< + 0: \x{413}\x{940} +\= Expect no match + \x{3c9}\x{3c9} +No match + \x{58d}\x{58d} +No match + \x{413}\x{413} +No match + \x{940}\x{940} +No match + +/^\p{Lt}+/i,utf + \x{1c5}AB + 0: \x{1c5}AB + +# -------------------------------------------------------------------------- + +/\p{ ^ L u }/ + AbCd + 0: b + +# hex + +/c3 b1/hex,utf + \N{U+00F1} + 0: \x{f1} + +/[^\P{Lu}1]/i,utf,ucp + a + 0: a + A + 0: A + \x{3a3} + 0: \x{3a3} + \x{3c3} + 0: \x{3c3} +\= Expect no match + 1 +No match + 2 +No match + +/[^\P{Lu}1]/utf,ucp + A + 0: A + \x{3a3} + 0: \x{3a3} +\= Expect no match + 1 +No match + 2 +No match + a +No match + \x{3c3} +No match + +/[\P{Lu}1]/i,utf,ucp + 1 + 0: 1 + 2 + 0: 2 +\= Expect no match + a +No match + A +No match + \x{3a3} +No match + \x{3c3} +No match + +/[\P{Lu}1]/utf,ucp + 1 + 0: 1 + 2 + 0: 2 + a + 0: a + \x{3c3} + 0: \x{3c3} +\= Expect no match + A +No match + \x{3a3} +No match + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[\p{L} - \p{Lu}])/ + a + 0: a +\= Expect no match + A +No match + 1 +No match + +/(?[\p{L} & \p{Lu}])/ + A + 0: A +\= Expect no match + a +No match + 1 +No match + +/(?[[\p{Lu}z] ^ [\p{Ll}G]])/ + A + 0: A + p + 0: p +\= Expect no match + G +No match + z +No match + 1 +No match + +/(?[\p{Ll} | \p{Nd}])/ + a + 0: a + 1 + 0: 1 +\= Expect no match + A +No match + +/(?[\p{Ll} + [\p{Nd}]])/ + a + 0: a + 1 + 0: 1 +\= Expect no match + A +No match + +/(?[ ![\p{Nd}z] ])/ + _ + 0: _ + Z + 0: Z +\= Expect no match + 1 +No match + z +No match + +/(?[ \P{Nd} + [2] ])/ + _ + 0: _ + Z + 0: Z + 2 + 0: 2 +\= Expect no match + 1 +No match + 3 +No match + +/(?[ ![\P{Nd}] ])/ + 1 + 0: 1 + 2 + 0: 2 +\= Expect no match + _ +No match + z +No match + +# caseless tests + +/(?[ \p{Lu} ^ \p{Ll} ])/ + a + 0: a + A + 0: A +\= Expect no match + _ +No match + 1 +No match + +/(?[ [\p{Lu}1] ^ \p{Ll} ])/i + 1 + 0: 1 +\= Expect no match + a +No match + A +No match + _ +No match + +/(?[ [\p{Lu}1] & [\p{Ll}1] ])/ + 1 + 0: 1 +\= Expect no match + a +No match + A +No match + _ +No match + 2 +No match + +/(?[ [\p{Lu}1] & [\p{Ll}1] ])/i + a + 0: a + A + 0: A + 1 + 0: 1 +\= Expect no match + _ +No match + 2 +No match + +/(?[ \p{Lu} + \p{Ll} & [a-z] ])/utf + \x{0411} + 0: \x{411} + a + 0: a + A + 0: A +\= Expect no match + \x{0431} +No match + +/(?[ (\p{Lu} + \p{Ll}) & [a-z] ])/utf + a + 0: a +\= Expect no match + \x{0411} +No match + \x{0431} +No match + A +No match + +/(?[ [a-z] & \p{Lu} + \p{Ll} ])/utf + a + 0: a + \x{0431} + 0: \x{431} +\= Expect no match + \x{0411} +No match + A +No match + +/(?[ [a-z] & (\p{Lu} + \p{Ll}) ])/utf + a + 0: a +\= Expect no match + \x{0431} +No match + \x{0411} +No match + A +No match + +# -------------- + # End of testinput4 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index b79959b..73d484c 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -98,6 +98,30 @@ No match # --------------------------------------------------------------------- +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. +/\65535/IB,utf,no_start_optimize +------------------------------------------------------------------ + Bra + \x{1ad}35 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_start_optimize utf +Optimizations: auto_possess,dotstar_anchor + +/\65536/IB,utf,no_start_optimize +------------------------------------------------------------------ + Bra + \x{1ad}36 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_start_optimize utf +Optimizations: auto_possess,dotstar_anchor + /\x{110000}/IB,utf Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large @@ -196,7 +220,7 @@ Subject length lower bound = 3 Capture group count = 0 Compile options: utf Overall options: anchored utf -Starting code units: a b +Starting code units: a b Subject length lower bound = 1 bar 0: b @@ -300,7 +324,7 @@ Subject length lower bound = 1 /[^\xFF]/IB ------------------------------------------------------------------ Bra - [^\x{ff}] + [^\x{ff}] (not) Ket End ------------------------------------------------------------------ @@ -334,7 +358,7 @@ Subject length lower bound = 1 /[^\x{100}]abc(xyz(?1))/IB,utf ------------------------------------------------------------------ Bra - [^\x{100}] + [^\x{100}] (not) abc CBra 1 xyz @@ -474,6 +498,7 @@ Subject length lower bound = 0 Capture group count = 0 Compile options: no_start_optimize utf Overall options: anchored no_start_optimize utf +Optimizations: auto_possess,dotstar_anchor /()()()()()()()()()() ()()()()()()()()()() @@ -795,7 +820,10 @@ No match No match /[[:a\x{100}b:]]/utf -Failed: error 130 at offset 3: unknown POSIX class name +Failed: error 130 at offset 14: unknown POSIX class name + +/[\p{InvalidOrBadProperty}]/ +Failed: error 147 at offset 25: unknown property after \P or \p /a[^]b/utf,allow_empty_class,match_unset_backref a\x{1234}b @@ -1481,7 +1509,7 @@ Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0x /[\H\x{d7ff}]+/B,utf ------------------------------------------------------------------ Bra - [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++ + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]++ Ket End ------------------------------------------------------------------ @@ -1521,7 +1549,7 @@ Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0x /[\V\x{d7ff}]+/B,utf ------------------------------------------------------------------ Bra - [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++ + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]++ Ket End ------------------------------------------------------------------ @@ -1648,11 +1676,11 @@ Partial match: \x{0d}\x{0d} /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf ------------------------------------------------------------------ Bra - [^\x{100}] - [^\x{1234}] - [^\x{ffff}] - [^\x{10000}] - [^\x{10ffff}] + [^\x{100}] (not) + [^\x{1234}] (not) + [^\x{ffff}] (not) + [^\x{10000}] (not) + [^\x{10ffff}] (not) Ket End ------------------------------------------------------------------ @@ -1660,11 +1688,11 @@ Partial match: \x{0d}\x{0d} /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf ------------------------------------------------------------------ Bra - /i [^\x{100}] - /i [^\x{1234}] - /i [^\x{ffff}] - /i [^\x{10000}] - /i [^\x{10ffff}] + /i [^\x{100}] (not) + /i [^\x{1234}] (not) + /i [^\x{ffff}] (not) + /i [^\x{10000}] (not) + /i [^\x{10ffff}] (not) Ket End ------------------------------------------------------------------ @@ -1672,15 +1700,15 @@ Partial match: \x{0d}\x{0d} /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf ------------------------------------------------------------------ Bra - [^\x{100}]* - [^\x{10000}]+ - [^\x{10ffff}]?? - [^\x{8000}]{4} - [^\x{8000}]* - [^\x{7fff}]{2} - [^\x{7fff}]{0,7}? - [^\x{fffff}]{5} - [^\x{fffff}]?+ + [^\x{100}]* (not) + [^\x{10000}]+ (not) + [^\x{10ffff}]?? (not) + [^\x{8000}]{4} (not) + [^\x{8000}]* (not) + [^\x{7fff}]{2} (not) + [^\x{7fff}]{0,7}? (not) + [^\x{fffff}]{5} (not) + [^\x{fffff}]?+ (not) Ket End ------------------------------------------------------------------ @@ -1688,15 +1716,15 @@ Partial match: \x{0d}\x{0d} /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf ------------------------------------------------------------------ Bra - /i [^\x{100}]* - /i [^\x{10000}]+ - /i [^\x{10ffff}]?? - /i [^\x{8000}]{4} - /i [^\x{8000}]* - /i [^\x{7fff}]{2} - /i [^\x{7fff}]{0,7}? - /i [^\x{fffff}]{5} - /i [^\x{fffff}]?+ + /i [^\x{100}]* (not) + /i [^\x{10000}]+ (not) + /i [^\x{10ffff}]?? (not) + /i [^\x{8000}]{4} (not) + /i [^\x{8000}]* (not) + /i [^\x{7fff}]{2} (not) + /i [^\x{7fff}]{0,7}? (not) + /i [^\x{fffff}]{5} (not) + /i [^\x{fffff}]?+ (not) Ket End ------------------------------------------------------------------ @@ -1818,7 +1846,7 @@ Subject length lower bound = 1 /[abc\p{L}\x{0660}]/IB,utf ------------------------------------------------------------------ Bra - [a-c\p{L}\x{660}] + [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] Ket End ------------------------------------------------------------------ @@ -1842,7 +1870,7 @@ Subject length lower bound = 1 /[\p{Nd}+-]+/IB,utf ------------------------------------------------------------------ Bra - [+\-\p{Nd}]++ + [+\-0-9\p{Nd}]++ Ket End ------------------------------------------------------------------ @@ -1864,7 +1892,15 @@ No match /[\P{Any}]/B ------------------------------------------------------------------ Bra - [\P{Any}] + [] + Ket + End +------------------------------------------------------------------ + +/[^\P{Any}\P{Any}]/B +------------------------------------------------------------------ + Bra + AllAny Ket End ------------------------------------------------------------------ @@ -1872,7 +1908,41 @@ No match /[\P{Any}\E]/B ------------------------------------------------------------------ Bra - [\P{Any}] + [] + Ket + End +------------------------------------------------------------------ + +/\p{Any}#\P{Any}![\p{Any}]:[\P{Any}]@[\p{Any}a-z]%[\P{Any}c]/B,utf +------------------------------------------------------------------ + Bra + AllAny + # + [] + ! + AllAny + : + [] + @ + AllAny + % + [c] + Ket + End +------------------------------------------------------------------ + +/[\P{Any}\P{Any}\P{Any}]![\p{Any}\p{Any}\p{Any}]:[^\P{Any}\P{Any}]@[^\p{Any}\p{Any}]%[^\p{Any}\P{Any}]/B,utf +------------------------------------------------------------------ + Bra + [] + ! + AllAny + : + AllAny + @ + [] + % + [] Ket End ------------------------------------------------------------------ @@ -2023,52 +2093,6 @@ No match \x{200d} No match -# These are here because Perl has problems with the negative versions of the -# properties and has changed how it behaves for caseless matching. - -/\p{^Lu}/i,utf - 1234 - 0: 1 -\= Expect no match - ABC -No match - -/\P{Lu}/i,utf - 1234 - 0: 1 -\= Expect no match - ABC -No match - -/\p{Ll}/i,utf - a - 0: a - Az - 0: z -\= Expect no match - ABC -No match - -/\p{Lu}/i,utf - A - 0: A - a\x{10a0}B - 0: \x{10a0} -\= Expect no match - a -No match - \x{1d00} -No match - -/\p{Lu}/i,utf - A - 0: A - aZ - 0: Z -\= Expect no match - abc -No match - /[\x{c0}\x{391}]/i,utf \x{c0} 0: \x{c0} @@ -3139,7 +3163,7 @@ No match /[^a]*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra - /i [^a]* + /i [^a]* (not) clist 03a3 03c2 03c3 Ket End @@ -3319,7 +3343,7 @@ No match AllAny+ AllAny AllAny+ - notprop Any + [] AllAny+ prop Lc AllAny+ @@ -3807,7 +3831,7 @@ No match /[\p{L}ab]{2,3}+/B,no_auto_possess ------------------------------------------------------------------ Bra - [ab\p{L}]{2,3}+ + [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}]{2,3}+ Ket End ------------------------------------------------------------------ @@ -3974,7 +3998,7 @@ Subject length lower bound = 1 /[A-`]/iB,utf ------------------------------------------------------------------ Bra - [A-z\x{212a}\x{17f}] + [A-z\x{17f}\x{212a}] Ket End ------------------------------------------------------------------ @@ -4057,7 +4081,7 @@ MK: a\x{12345}b\x{09}(d)c ------------------------------------------------------------------ Bra a - [b[:punct:]] + [!-/:-@[-`b{-~\xa1\xa7\xab\xb6\xb7\xbb\xbf[:punct:]] Ket End ------------------------------------------------------------------ @@ -4066,7 +4090,7 @@ MK: a\x{12345}b\x{09}(d)c ------------------------------------------------------------------ Bra a - [b[:punct:]] + [!-/:-@[-`b{-~\xa1\xa7\xab\xb6\xb7\xbb\xbf[:punct:]] Ket End ------------------------------------------------------------------ @@ -4075,7 +4099,7 @@ MK: a\x{12345}b\x{09}(d)c ------------------------------------------------------------------ Bra a - [b[:punct:]] + [!-/:-@[-`b{-~\xa1\xa7\xab\xb6\xb7\xbb\xbf[:punct:]] Ket End ------------------------------------------------------------------ @@ -4083,7 +4107,7 @@ MK: a\x{12345}b\x{09}(d)c /[[:^ascii:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra - [\x80-\xff] (neg) + [^\x00-\x7f] Ket End ------------------------------------------------------------------ @@ -4091,7 +4115,7 @@ MK: a\x{12345}b\x{09}(d)c /[[:^ascii:]\w]/utf,ucp,bincode ------------------------------------------------------------------ Bra - [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] + [^\x00-/:-@[-^`{-\x7f] Ket End ------------------------------------------------------------------ @@ -4099,7 +4123,7 @@ MK: a\x{12345}b\x{09}(d)c /[\w[:^ascii:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra - [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] + [^\x00-/:-@[-^`{-\x7f] Ket End ------------------------------------------------------------------ @@ -4107,7 +4131,7 @@ MK: a\x{12345}b\x{09}(d)c /[^[:ascii:]\W]/utf,ucp,bincode ------------------------------------------------------------------ Bra - [^\x00-\x7f\P{Xwd}] + [^\x00-\xa9\xab-\xb1\xb4\xb6-\xb8\xbb\xbf\xd7\xf7\P{Xwd}] Ket End ------------------------------------------------------------------ @@ -4124,7 +4148,7 @@ No match /[[:^ascii:]a]/utf,ucp,bincode ------------------------------------------------------------------ Bra - [a\x80-\xff] (neg) + [^\x00-`b-\x7f] Ket End ------------------------------------------------------------------ @@ -4170,9 +4194,9 @@ No match /[^\D\P{Nd}]/utf a9b 0: 9 - \x{1d7cf} - 0: \x{1d7cf} \= Expect no match + \x{1d7cf} +No match \x{10000} No match @@ -4748,7 +4772,7 @@ Callout 0: last capture = 1 Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode /\N{U+}/utf -Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} +Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{U+} /\N{U}/ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u @@ -5018,7 +5042,7 @@ Failed: error 147 at offset 8: unknown property after \P or \p # ------------------------------------------------ /\p{\2b[:xäigi:t:_/ -Failed: error 146 at offset 17: malformed \P or \p sequence +Failed: error 146 at offset 10: malformed \P or \p sequence # Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without # the restriction. @@ -5127,7 +5151,7 @@ No match /[Ss]+/iB,utf ------------------------------------------------------------------ Bra - [Ss\x{17f}\x{17f}]++ + [Ss\x{17f}]++ Ket End ------------------------------------------------------------------ @@ -5145,7 +5169,7 @@ No match /[S\x{17f}]/iB,utf ------------------------------------------------------------------ Bra - [Ss\x{17f}\x{17f}] + [Ss\x{17f}] Ket End ------------------------------------------------------------------ @@ -5161,7 +5185,7 @@ No match /[\x{17f}s]/iB,utf ------------------------------------------------------------------ Bra - [Ss\x{17f}\x{17f}] + [Ss\x{17f}] Ket End ------------------------------------------------------------------ @@ -5177,7 +5201,7 @@ No match /[\x{4b}\x{6b}]/iB,utf ------------------------------------------------------------------ Bra - [Kk\x{212a}\x{212a}] + [Kk\x{212a}] Ket End ------------------------------------------------------------------ @@ -5198,8 +5222,478 @@ No match \x{212a}\x{212a} No match +/[sk](?r:[sk])[sk]/Bi,utf +------------------------------------------------------------------ + Bra + [KSks\x{17f}\x{212a}] + Bra + [KSks] + Ket + [KSks\x{17f}\x{212a}] + Ket + End +------------------------------------------------------------------ + SKS + 0: SKS + sks + 0: sks + \x{212a}S\x{17f} + 0: \x{212a}S\x{17f} + \x{17f}K\x{212a} + 0: \x{17f}K\x{212a} +\= Expect no match + s\x{212a}s +No match + K\x{17f}K +No match + +/(.) \1/i,utf,caseless_restrict + s S + 0: s S + 1: s + k K + 0: k K + 1: k +\= Expect no match + s \x{17f} +No match + k \x{212a} +No match + +/(.) (?r:\1)/i,utf + s S + 0: s S + 1: s + k K + 0: k K + 1: k +\= Expect no match + s \x{17f} +No match + k \x{212a} +No match + +/(.) \1/i,utf + s S + 0: s S + 1: s + k K + 0: k K + 1: k + s \x{17f} + 0: s \x{17f} + 1: s + k \x{212a} + 0: k \x{212a} + 1: k + +/(?:(?ss)|(?kk)) \k/i,utf,dupnames,caseless_restrict + sS Ss + 0: sS Ss + 1: sS + kK Kk + 0: kK Kk + 1: + 2: kK +\= Expect no match + sS \x{17f}s +No match + kK \x{212a}k +No match + +/(?:(?ss)|(?kk)) \k/i,utf,dupnames + sS Ss + 0: sS Ss + 1: sS + kK Kk + 0: kK Kk + 1: + 2: kK + sS \x{17f}s + 0: sS \x{17f}s + 1: sS + kK \x{212a}k + 0: kK \x{212a}k + 1: + 2: kK + +/(?:(?s)|(?k)) \k{3,}!/i,utf,dupnames,caseless_restrict + s SsSs! + 0: s SsSs! + 1: s + k KkKk! + 0: k KkKk! + 1: + 2: k +\= Expect no match + s \x{17f}sSs\x{17f}! +No match + k \x{212a}kKk\x{212a}! +No match + +/(?:(?s)|(?k)) \k{3,}!/i,utf,dupnames + s SsSs! + 0: s SsSs! + 1: s + k KkKk! + 0: k KkKk! + 1: + 2: k + s \x{17f}sSs\x{17f}! + 0: s \x{17f}sSs\x{17f}! + 1: s + k \x{212a}kKk\x{212a}! + 0: k \x{212a}kKk\x{212a}! + 1: + 2: k + # End caseless restrict tests +# TESTS for PCRE2_EXTRA_TURKISH_CASING - again, tests with and without. + +/i/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/i/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/I/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/I/i,utf,turkish_casing + I + 0: I + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + \x{0130} +No match + +/\x{0130}/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/\x{0130}/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/\x{0131}/i,utf + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + I +No match + \x{0130} +No match + +/\x{0131}/i,utf,turkish_casing + I + 0: I + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + \x{0130} +No match + +/[i]/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/[i]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[^i]/i,utf + \x{0130} + 0: \x{130} + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + I +No match + +/[^i]/i,utf,turkish_casing + I + 0: I + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + \x{0130} +No match + +/[\x{0130}]/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/[\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[\x{0120}-\x{0130}]/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/[\x{0120}-\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[zi]/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/[zi]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[z\x{0130}]/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/[z\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[iI]/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/[iI]/i,utf,turkish_casing + i + 0: i + I + 0: I + \x{0130} + 0: \x{130} + \x{0131} + 0: \x{131} + +/[i\x{0130}]/i,utf + i + 0: i + I + 0: I + \x{0130} + 0: \x{130} +\= Expect no match + \x{0131} +No match + +/[i\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/(.) \1/i,utf + i I + 0: i I + 1: i +\= Expect no match + i \x{0130} +No match + \x{0131} I +No match + +/(*TURKISH_CASING)(.) \1/i,utf + i \x{0130} + 0: i \x{130} + 1: i + \x{0131} I + 0: \x{131} I + 1: \x{131} +\= Expect no match + i I +No match + +/(.) \1/i,utf,turkish_casing + i \x{0130} + 0: i \x{130} + 1: i + \x{0131} I + 0: \x{131} I + 1: \x{131} +\= Expect no match + i I +No match + +/i/i,utf,caseless_restrict,turkish_casing +Failed: error 206 at offset 0: PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible + +/i/i,turkish_casing +Failed: error 204 at offset 0: PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode + +/i/i,utf,caseless_restrict + i + 0: i + +/i/i,ucp,caseless_restrict + i + 0: i + +/b(?r:[\x{00FF}-\x{FFEE}])/i,utf,turkish_casing + b\x{0130} + 0: b\x{130} + b\x{0131} + 0: b\x{131} +\= Expect no match + bi +No match + bI +No match + bk +No match + +/[\x60-\x7f]/i,ucp + i + 0: i + I + 0: I + +/[\x60-\xc0]/i,ucp + i + 0: i + I + 0: I + +/[\x80-\xc0]/i,ucp +\= Expect no match + i +No match + I +No match + +# End Turkish casing tests + # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. # DIGITS @@ -5515,4 +6009,2217 @@ No match (?\777< + abc + 1: >\x{1ff}< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${namED_1}< + 1: >b< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${namedverylongbutperfectlylegalsoyoushouldnthaveaproblem_1}< + 1: >b< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${nämed}< + 1: >b< + +/a(?b)c/utf,substitute_extended + abc\=replace=>${nämedverylongbutperfectlylegalsoyoushouldnthaveaproblem_Ù¢}< + 1: >b< + +# python_octal + +/\400/utf + \o{400} + 0: \x{100} + +/\400/utf,python_octal +Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL) + +/abc/utf,substitute_extended + abc\=replace=\400 + 1: \x{100} + +/abc/utf,substitute_extended,python_octal + abc\=replace=\400 +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +# Character range merging tests + +/[\x{1200}\s\x{1202}\d\x{1201}]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [\p{Xsp}\p{Nd}\x{1200}-\x{1202}]++ + Ket + End +------------------------------------------------------------------ + \x{11ff}\x{1200}\x{1201}\x{1202}\x{1203} + 0: \x{1200}\x{1201}\x{1202} + +/[\x{2000}-\x{2500}\x{2100}-\x{2600}\d\x{1800}-\x{1fff}]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [\p{Nd}\x{1800}-\x{2600}]++ + Ket + End +------------------------------------------------------------------ + \x{17ff}\x{1800}\x{2600}\x{2601} + 0: \x{1800}\x{2600} + +/[\x{10008}\x{10003}\x{10006}\x{10004}\x{10007}]+/B,utf +------------------------------------------------------------------ + Bra + [\x{10003}-\x{10004}\x{10006}-\x{10008}]++ + Ket + End +------------------------------------------------------------------ + \x{10002}\x{10005}\x{10003}\x{10004}\x{10006}\x{10007}\x{10008}\x{10009} + 0: \x{10003}\x{10004}\x{10006}\x{10007}\x{10008} + +/[\x{100}-\x{400}]+/Bi,utf +------------------------------------------------------------------ + Bra + [Ss\xb5\xff\x{100}-\x{400}\x{450}\x{1fbe}\x{1fd3}\x{1fe3}\x{2126}\x{2c62}\x{2c64}-\x{2c66}\x{2c6d}-\x{2c70}\x{2c7e}-\x{2c7f}\x{a78d}\x{a7aa}-\x{a7ae}\x{a7b0}-\x{a7b2}\x{a7c5}\x{a7cb}\x{a7dc}]++ + Ket + End +------------------------------------------------------------------ + qS\x{ff}\x{100}\x{a7c5}\x{401} + 0: S\x{ff}\x{100}\x{a7c5} + \x{2c63}\x{2c64}\x{2c65}\x{2c66}\x{2c67} + 0: \x{2c64}\x{2c65}\x{2c66} + \x{a7af}\x{a7b0}\x{a7b1}\x{a7b2}\x{a7b3} + 0: \x{a7b0}\x{a7b1}\x{a7b2} + +/[\x{100}-\x{400}\p{Ll}\x{500}-\x{700}\p{OldHungarian}\x{701}\p{bidiLRI}]/B,utf +------------------------------------------------------------------ + Bra + [\p{Ll}\p{Oldhungarian}\p{Bidilri}\x{100}-\x{400}\x{500}-\x{701}] + Ket + End +------------------------------------------------------------------ + +/[\pC\x{100}-\x{200}\h\pN]/B,utf +------------------------------------------------------------------ + Bra + [\x00- 0-9\x7f-\xa0\xad\xb2\xb3\xb9\xbc-\xbe\p{C}\p{N}\x{100}-\x{200}\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + +/[\pC\x{100}-\x{200}\v\pN]/B,utf +------------------------------------------------------------------ + Bra + [\x00-\x1f0-9\x7f-\x9f\xad\xb2\xb3\xb9\xbc-\xbe\p{C}\p{N}\x{100}-\x{200}\x{2028}-\x{2029}] + Ket + End +------------------------------------------------------------------ + +/[\pC\x{100}-\x{200}\H\pN]/B,utf +------------------------------------------------------------------ + Bra + [\x00-\x1f!-\x9f\xa1-\xff\p{C}\p{N}\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[\pC\x{100}-\x{200}\V\pN]/B,utf +------------------------------------------------------------------ + Bra + [\x00-\xff\p{C}\p{N}\x{100}-\x{2027}\x{202a}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[\x{16e49}-\x{16e4f}\x{20000}\x{16e40}-\x{16e48}\pN]/Bi,utf +------------------------------------------------------------------ + Bra + [\p{N}\x{16e40}-\x{16e4f}\x{16e60}-\x{16e6f}\x{20000}] + Ket + End +------------------------------------------------------------------ + +/[\x80-\x{4000}\x90\x{400}-\x{f000}\xa0\x{4000}-\x{10ffff}]++/B,utf +------------------------------------------------------------------ + Bra + [^\x00-\x7f]++ + Ket + End +------------------------------------------------------------------ + \x{7f}\x{80}\x{100}\x{10fffe}\x{10ffff}\x00 + 0: \x{80}\x{100}\x{10fffe}\x{10ffff} + +/[\x80-\x{4000}\x90\x{400}-\x{f000}\xa0\pN\x{4000}-\x{10ffff}]++/B,utf +------------------------------------------------------------------ + Bra + [^\x00-/:-\x7f]++ + Ket + End +------------------------------------------------------------------ + \x{7f}\x{80}\x{100}090\x{10fffe}\x{10ffff}\x00 + 0: \x{80}\x{100}090\x{10fffe}\x{10ffff} + +/[\x00-\x{4000}\x{2000}-\x{10ffff}]++/B,utf +------------------------------------------------------------------ + Bra + AllAny++ + Ket + End +------------------------------------------------------------------ + abcd + 0: abcd + +/[abc\p{Any}]{5,7}/B,utf +------------------------------------------------------------------ + Bra + AllAny{5} + AllAny{0,2}+ + Ket + End +------------------------------------------------------------------ + xyz +No match + +/[^\p{Any}\x34\p{Any}]*cat/B,utf +------------------------------------------------------------------ + Bra + []*+ + cat + Ket + End +------------------------------------------------------------------ + cat + 0: cat + +/[\pN\xf0-\x{10ffff}]{5,8}/B,utf +------------------------------------------------------------------ + Bra + [^\x00-/:-\xb1\xb4-\xb8\xba\xbb\xbf-\xef]{5,8}+ + Ket + End +------------------------------------------------------------------ + ab0123456cd + 0: 0123456 + +/[\x00-\x{398}\x{39a}-\x{10ffff}]*#(?i)[\x00-\x{398}\x{39a}-\x{10ffff}]*?#/B,utf +------------------------------------------------------------------ + Bra + [\x00-\xff\x{100}-\x{398}\x{39a}-\x{10ffff}]* + # + AllAny*? + /i # + Ket + End +------------------------------------------------------------------ + abcd#efg# + 0: abcd#efg# + +# Freeing memory on error test +/[\x{100}-\x{400}][\x{100}-\x{300}][\x{100}-\x{200}]\8/i,utf +Failed: error 115 at offset 52: reference to non-existent subpattern + +# Character list tests + +/[\x{100}-\x{7fff}\x{d7b0}\x{d7b1}\x{d7b3}\x{d7b4}\x{d7b6}\x{d7b7}\x{d7b9}\x{d7ba}]{12}/B,utf +------------------------------------------------------------------ + Bra + [\x{100}-\x{7fff}\x{d7b0}-\x{d7b1}\x{d7b3}-\x{d7b4}\x{d7b6}-\x{d7b7}\x{d7b9}-\x{d7ba}]{12,12}+ + Ket + End +------------------------------------------------------------------ + \x{8000}\x{d7af}\x{d7b2}\x{d7b5}\x{d7b8}\x{d7bb}\x{100}\x{800}\x{7000}\x{7fff}\x{d7b0}\x{d7b1}\x{d7b3}\x{d7b4}\x{d7b6}\x{d7b7}\x{d7b9}\x{d7ba}\x{100} + 0: \x{100}\x{800}\x{7000}\x{7fff}\x{d7b0}\x{d7b1}\x{d7b3}\x{d7b4}\x{d7b6}\x{d7b7}\x{d7b9}\x{d7ba} + +/([\x{6535}\x{6536}\x{6538}\x{6539}\x{653b}\x{653c}\x{653e}\x{653f}\x{6541}\x{6542}\x{8000}-\x{ffff}]#)+/B,utf +------------------------------------------------------------------ + Bra + CBra 1 + [\x{6535}-\x{6536}\x{6538}-\x{6539}\x{653b}-\x{653c}\x{653e}-\x{653f}\x{6541}-\x{6542}\x{8000}-\x{ffff}] + # + KetRmax + Ket + End +------------------------------------------------------------------ + \x{6534}#\x{6537}#\x{653a}#\x{653d}#\x{6540}#\x{6543}#\x{7fff}#\x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}#\x{ffff} + 0: \x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}# + 1: \x{c246}# + +/[[:xdigit:]\x{400}-\x{600}]+/utf,ucp + !a0\x{400}\x{600}9\x{3ff} + 0: a0\x{400}\x{600}9 + +/[^[:xdigit:]\x{400}-\x{600}]+/utf,ucp + \x{400}(\x{3ff}\x{601})\x{600} + 0: (\x{3ff}\x{601}) + +/[[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp + !A0\x{700}9\x{601} + 0: A0\x{700}9 + +/[^[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp + \x{600}(\x{6ff}\x{701}\x{3ff}\x{601})\x{700} + 0: (\x{6ff}\x{701}\x{3ff}\x{601}) + +/[[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp + !f0\x{800}\x{600}9\x{601} + 0: f0\x{800}\x{600}9 + +/[^[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp + \x{700}[\x{3ff}\x{601}\x{6ff}\x{801}\x{8ff}\x{901}]\x{900} + 0: [\x{3ff}\x{601}\x{6ff}\x{801}\x{8ff}\x{901}] + +/[[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp + !F0\x{400}\x{410}\x{500}\x{600}\x{610}\x{700}\x{800}\x{810}9\x{7ff} + 0: F0\x{400}\x{410}\x{500}\x{600}\x{610}\x{700}\x{800}\x{810}9 + +/[^[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp + \x{800}<\x{3ff}\x{411}\x{4ff}\x{501}\x{5ff}\x{611}\x{6ff}\x{701}\x{7ff}\x{811}>\x{810} + 0: <\x{3ff}\x{411}\x{4ff}\x{501}\x{5ff}\x{611}\x{6ff}\x{701}\x{7ff}\x{811}> + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[\p{Lu}[\p{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + C + 0: C +\= Expect no match + [ +No match + a +No match + +/[[\pL][\p{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{L}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + a + 0: a +\= Expect no match + [ +No match + ] +No match + +/[[\p{Lu}]||[\p{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + A + 0: A + 1 + 0: 1 +\= Expect no match + a +No match + +/[[^\pL][\p{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [^\p{L}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + . + 0: . +\= Expect no match + A +No match + +/[^[\pL][\p{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [^\p{L}] + xclass: [^\p{Nd}] + AND + ] + Ket + End +------------------------------------------------------------------ + . + 0: . +\= Expect no match + A +No match + 0 +No match + +/[^[\pL]&&[\p{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [^\p{L}] + xclass: [^\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + A + 0: A + 0 + 0: 0 + +/[[\p{Lu}\p{Ll}]||[\p{Nd}\p{Ll}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}\p{Ll}] + xclass: [\p{Nd}\p{Ll}] + OR + ] + Ket + End +------------------------------------------------------------------ + A + 0: A + 1 + 0: 1 + c + 0: c +\= Expect no match + _ +No match + +/[[\p{Lu}\p{Ll}]&&[\p{Nd}\p{Ll}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}\p{Ll}] + xclass: [\p{Nd}\p{Ll}] + AND + ] + Ket + End +------------------------------------------------------------------ + c + 0: c +\= Expect no match + A +No match + 1 +No match + _ +No match + +/[[\p{Lu}\p{Ll}]--[\p{Nd}\p{Ll}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}\p{Ll}] + xclass: [^\p{Nd}\p{Ll}] + AND + ] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + 1 +No match + c +No match + _ +No match + +/[[\p{Lu}\p{Ll}]~~[\p{Nd}\p{Ll}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}\p{Ll}] + xclass: [\p{Nd}\p{Ll}] + XOR + ] + Ket + End +------------------------------------------------------------------ + A + 0: A + 1 + 0: 1 +\= Expect no match + c +No match + _ +No match + +/[\pL[]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A-Z\]a-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + A + 0: A + ] + 0: ] +\= Expect no match + [ +No match + +/[\pL[^]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\]] + Ket + End +------------------------------------------------------------------ + A + 0: A + [ + 0: [ + 0 + 0: 0 +\= Expect no match + ] +No match + +/[\pL[]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + ] +No match + [ +No match + +/[\pL[^]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + A + 0: A + 0 + 0: 0 + [ + 0: [ + ] + 0: ] + +/[\dAC-E[:space:]\p{Lu}&&[^z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\x09-\x0d 0-9A-Z\xc0-\xd6\xd8-\xde\p{Lu}] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + A + 0: A + C + 0: C + D + 0: D + E + 0: E + \t + 0: \x09 +\= Expect no match + a +No match + ; +No match + +/[z||[^\dAC-E[:space:]\p{Lu}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\x09-\x0d 0-9A-Z\xc0-\xd6\xd8-\xde\p{Lu}] + Ket + End +------------------------------------------------------------------ + z + 0: z + ; + 0: ; +\= Expect no match + 0 +No match + A +No match + C +No match + D +No match + E +No match + B +No match + F +No match + \t +No match + +/[\p{Lu}\p{Nd}||cd]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [0-9A-Zcd\xc0-\xd6\xd8-\xde\p{Lu}\p{Nd}] + Ket + End +------------------------------------------------------------------ + A + 0: A + 0 + 0: 0 + c + 0: c +\= Expect no match + e +No match + +/[[\p{Lu}]\p{Nd}||[c]d]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [0-9A-Zcd\xc0-\xd6\xd8-\xde] + xclass: [\p{Lu}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + A + 0: A + 0 + 0: 0 + c + 0: c +\= Expect no match + e +No match + +/[\p{Lu}[\p{Nd}]||c[d]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [0-9A-Zcd\xc0-\xd6\xd8-\xde] + xclass: [\p{Lu}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + A + 0: A + 0 + 0: 0 + c + 0: c +\= Expect no match + e +No match + +/[\p{Lu}-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-A-Z\xc0-\xd6\xd8-\xde\p{Lu}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + a +No match + +/[-\p{Lu}]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-A-Z\xc0-\xd6\xd8-\xde\p{Lu}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + a +No match + +/[\pL-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + 0 +No match + +/[-\pL]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + 0 +No match + +/[\p{Lu}-]/B +------------------------------------------------------------------ + Bra + [\-A-Z\xc0-\xd6\xd8-\xde\p{Lu}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + a +No match + +/[-\p{Lu}]/B +------------------------------------------------------------------ + Bra + [\-A-Z\xc0-\xd6\xd8-\xde\p{Lu}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + a +No match + +/[\pL-]/B +------------------------------------------------------------------ + Bra + [\-A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + 0 +No match + +/[-\pL]/B +------------------------------------------------------------------ + Bra + [\-A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + A + 0: A + - + 0: - +\= Expect no match + 0 +No match + +/[\p{Lu}-z]/B,alt_extended_class +Failed: error 150 at offset 8: invalid range in character class + +/[z-\p{Lu}]/B,alt_extended_class +Failed: error 150 at offset 9: invalid range in character class + +/[\pL-z]/B,alt_extended_class +Failed: error 150 at offset 5: invalid range in character class + +/[z-\pL]/B,alt_extended_class +Failed: error 150 at offset 6: invalid range in character class + +/[\p{Lu}-&&-\pL]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [\-A-Z\xc0-\xd6\xd8-\xde] + xclass: [\p{Lu}] + xclass: [\p{L}] + AND + ] + Ket + End +------------------------------------------------------------------ + - + 0: - + A + 0: A +\= Expect no match + a +No match + +/[-\p{Lu}&&\pL-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [\-A-Z\xc0-\xd6\xd8-\xde] + xclass: [\p{Lu}] + xclass: [\p{L}] + AND + ] + Ket + End +------------------------------------------------------------------ + - + 0: - + A + 0: A +\= Expect no match + a +No match + +/[[\p{Lu}]-&&-[\pL]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [\-A-Z\xc0-\xd6\xd8-\xde] + xclass: [\p{Lu}] + xclass: [\p{L}] + AND + ] + Ket + End +------------------------------------------------------------------ + - + 0: - + A + 0: A +\= Expect no match + a +No match + +/[-[\p{Lu}]&&[\pL]-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [\-A-Z\xc0-\xd6\xd8-\xde] + xclass: [\p{Lu}] + xclass: [\p{L}] + AND + ] + Ket + End +------------------------------------------------------------------ + - + 0: - + A + 0: A +\= Expect no match + a +No match + +/(?xx:[ ^ 5[ ^ \p{Nd}] ])/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + [0-46-9\p{Nd}] + Ket + Ket + End +------------------------------------------------------------------ + 4 + 0: 4 +\= Expect no match + a +No match + ; +No match + 5 +No match + +/(?xx:[ ^ \p{Nd}[ ^ 5] ])/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + [] + Ket + Ket + End +------------------------------------------------------------------ +\= Expect no match + a +No match + ; +No match + 4 +No match + 5 +No match + +/(?xx:[ ^ \p{Nd}[ ^ \p{Nd}] ])/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + eclass[ + no bitmap + xclass: [^\p{Nd}] + xclass: [\p{Nd}] + AND + ] + Ket + Ket + End +------------------------------------------------------------------ +\= Expect no match + a +No match + ; +No match + 4 +No match + 5 +No match + +/[ ^ \p{Ll}[ ^ \p{Nd}] ]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [ 0-9^a-z\xb5\xdf-\xf6\xf8-\xff] + xclass: [\p{Ll}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + \x20 + 0: + ^ + 0: ^ + a + 0: a + 0 + 0: 0 +\= Expect no match + A +No match + ; +No match + +/[a-c--\p{Nd}]+/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-c]++ + Ket + End +------------------------------------------------------------------ + ac + 0: ac + a + 0: a +\= Expect no match + 0 +No match + +/[a-c--\p{Nd}]{2,3}/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-c]{2,3}+ + Ket + End +------------------------------------------------------------------ + ac + 0: ac + cac + 0: cac +\= Expect no match + a +No match + 00 +No match + +/x[a-c--\p{Nd}]+y/B,alt_extended_class +------------------------------------------------------------------ + Bra + x + [a-c]++ + y + Ket + End +------------------------------------------------------------------ + xacy + 0: xacy + xaay + 0: xaay + xay + 0: xay +\= Expect no match + zacy +No match + xacz +No match + xy +No match + x0y +No match + +/[\pL--\pL--\pL]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{L}] + xclass: [^\p{L}] + AND + xclass: [^\p{L}] + AND + ] + Ket + End +------------------------------------------------------------------ +\= Expect no match + A +No match + 1 +No match + +/[[\pL--\pL]--\pL]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{L}] + xclass: [^\p{L}] + AND + xclass: [^\p{L}] + AND + ] + Ket + End +------------------------------------------------------------------ +\= Expect no match + A +No match + 1 +No match + +/[\pL--[\pL--\pL]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{L}] + xclass: [^\p{L}] + xclass: [\p{L}] + OR + AND + ] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + 1 +No match + +/[\pL--^\p{Nd}]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] + xclass: [\p{L}] + xclass: [^\p{Nd}] + AND + ] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + 1 +No match + ^ +No match + +/([a-z--[\pL&&n]])\1/B,alt_extended_class +------------------------------------------------------------------ + Bra + CBra 1 + [a-mo-z] + Ket + \1 + Ket + End +------------------------------------------------------------------ + aa + 0: aa + 1: a + zz + 0: zz + 1: z +\= Expect no match + az +No match + nn +No match + +/(x[a-z--[\pL&&n]]y)\1/B,alt_extended_class +------------------------------------------------------------------ + Bra + CBra 1 + x + [a-mo-z] + y + Ket + \1 + Ket + End +------------------------------------------------------------------ + xayxay + 0: xayxay + 1: xay + xzyxzy + 0: xzyxzy + 1: xzy +\= Expect no match + xnyxny +No match + +/(?:_\1|([a-z--[\pL&&n]])){2}/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + Ket + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + Ket + Ket + End +------------------------------------------------------------------ + a_a + 0: a_a + 1: a + z_z + 0: z_z + 1: z +\= Expect no match + a_z +No match + n_n +No match + +/(?:_\1|([a-z--[\pL&&n]]))+/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + a_a + 0: a_a + 1: a + z_z + 0: z_z + 1: z + a_partial + 0: a + 1: a +\= Expect no match + n_n +No match + +/[\p{Nd}||[\pL--\p{Lu}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Nd}] + xclass: [\p{L}] + xclass: [^\p{Lu}] + AND + OR + ] + Ket + End +------------------------------------------------------------------ + a + 0: a + 0 + 0: 0 +\= Expect no match + C +No match + +/[\P{Nd}||2]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\x00-/2:-\xff\P{Nd}] + Ket + End +------------------------------------------------------------------ + _ + 0: _ + Z + 0: Z + 2 + 0: 2 +\= Expect no match + 1 +No match + 3 +No match + +/[^[\P{Nd}]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\P{Nd}] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + 2 + 0: 2 +\= Expect no match + _ +No match + z +No match + +# caseless tests + +/[\p{Lu}~~\p{Ll}]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Lu}] + xclass: [\p{Ll}] + XOR + ] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A +\= Expect no match + _ +No match + 1 +No match + +/[[\p{Lu}1]~~\p{Ll}]/iB,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [1] + xclass: [\p{Lc}] + xclass: [\p{Lc}] + XOR + ] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 +\= Expect no match + a +No match + A +No match + _ +No match + +/[[\p{Lu}1]&&[\p{Ll}1]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [1] + xclass: [\p{Lu}] + xclass: [\p{Ll}] + AND + ] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 +\= Expect no match + a +No match + A +No match + _ +No match + 2 +No match + +/[[\p{Lu}1]&&[\p{Ll}1]]/iB,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [1A-Za-z\xb5\xc0-\xd6\xd8-\xf6\xf8-\xff] + xclass: [\p{Lc}] + xclass: [\p{Lc}] + AND + ] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + 1 + 0: 1 +\= Expect no match + _ +No match + 2 +No match + \ +No match + +/[\p{Thai}&&\p{Nd}]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ] + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} +\= Expect no match + 0 +No match + a +No match + \x{0e01} +No match + +/[\p{Thai}||\p{Nd}]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + OR + ] + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} + \x{0e01} + 0: \x{e01} + 0 + 0: 0 +\= Expect no match + a +No match + +/[\p{Thai}~~\p{Nd}]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + XOR + ] + Ket + End +------------------------------------------------------------------ + \x{0e01} + 0: \x{e01} + 0 + 0: 0 +\= Expect no match + \x{0e51} +No match + a +No match + +/[[\p{Thai}&&\p{Nd}]~~[^a]]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + eclass[ + bitmap: [^a] + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + NOT + ] + Ket + End +------------------------------------------------------------------ + \x{0e01} + 0: \x{e01} + b + 0: b + 0 + 0: 0 +\= Expect no match + a +No match + \x{0e51} +No match + +/^[\p{Thai}&&\p{Nd}]?$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]? + $ + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} + \ + 0: +\= Expect no match + a +No match + +/^[\p{Thai}&&\p{Nd}]??$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]?? + $ + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} + \ + 0: +\= Expect no match + a +No match + +/^[\p{Thai}&&\p{Nd}]?+$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]?+ + $ + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} + \ + 0: +\= Expect no match + a +No match + +/^[\p{Thai}&&\p{Nd}]{3}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{3,3} + $ + Ket + End +------------------------------------------------------------------ + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]{3,}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{3,} + $ + Ket + End +------------------------------------------------------------------ + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]{3,}?$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{3,}? + $ + Ket + End +------------------------------------------------------------------ + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]{3,}+$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{3,}+ + $ + Ket + End +------------------------------------------------------------------ + \x{0e51}\x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]{,3}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{0,3} + $ + Ket + End +------------------------------------------------------------------ + \ + 0: + \x{0e51} + 0: \x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51}\x{0e51}\x{0e51}\x{0e51} +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]{,3}?$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{0,3}? + $ + Ket + End +------------------------------------------------------------------ + \ + 0: + \x{0e51} + 0: \x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51}\x{0e51}\x{0e51}\x{0e51} +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]{,3}+$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]{0,3}+ + $ + Ket + End +------------------------------------------------------------------ + \ + 0: + \x{0e51} + 0: \x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51}\x{0e51}\x{0e51}\x{0e51} +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]+\x{0e51}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]+ + \x{e51} + $ + Ket + End +------------------------------------------------------------------ + \x{0e51}\x{0e51} + 0: \x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]+?\x{0e51}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]+? + \x{e51} + $ + Ket + End +------------------------------------------------------------------ + \x{0e51}\x{0e51} + 0: \x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]++\x{0e51}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]++ + \x{e51} + $ + Ket + End +------------------------------------------------------------------ +\= Expect no match + \x{0e51} +No match + \x{0e51}\x{0e51} +No match + \x{0e51}\x{0e51}\x{0e51} +No match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]*\x{0e51}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]* + \x{e51} + $ + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} + \x{0e51}\x{0e51} + 0: \x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]*?\x{0e51}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]*? + \x{e51} + $ + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} + \x{0e51}\x{0e51} + 0: \x{e51}\x{e51} + \x{0e51}\x{0e51}\x{0e51} + 0: \x{e51}\x{e51}\x{e51} +\= Expect no match + \ +No match + a +No match + +/^[\p{Thai}&&\p{Nd}]*+\x{0e51}$/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + ^ + eclass[ + no bitmap + xclass: [\p{Thai}] + xclass: [\p{Nd}] + AND + ]*+ + \x{e51} + $ + Ket + End +------------------------------------------------------------------ +\= Expect no match + \x{0e51} +No match + \x{0e51}\x{0e51} +No match + \x{0e51}\x{0e51}\x{0e51} +No match + \ +No match + a +No match + +/[^[^\p{Thai}]]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + [\p{Thai}] + Ket + End +------------------------------------------------------------------ + \x{0e51} + 0: \x{e51} +\= Expect no match + a +No match + +/[^[^\p{L}]]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ + \x{0e01} + 0: \x{e01} + a + 0: a +\= Expect no match + 0 +No match + +/[\pL&&[^\x00-\xFF]]/B,utf,alt_extended_class +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ + \x{21e} + 0: \x{21e} +\= Expect no match + a +No match + +/[\pL&&\x{100}-\x{1000}]{3,6}+/utf,alt_extended_class + \x{145}\x{18b}A\x{145}\x{18b}\x{1C2}\x{21a}\x{257}\x{2ae}\x{0145}\x{18b} + 0: \x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae} + \x{145}A\x{145}\x{18b}\x{1C2}B + 0: \x{145}\x{18b}\x{1c2} + +/[\pL&&\x{100}-\x{1000}]{3,6}\x{2A3}/utf,alt_extended_class + \x{145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{2a3} + 0: \x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{2a3} + \x{145}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3} + 0: \x{145}\x{18b}\x{1c2}\x{2a3} + \x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3}\x{2a3} + 0: \x{145}\x{18b}\x{1c2}\x{2a3}\x{2a3} + \x{0145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{145}\x{2a3} + 0: \x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{145}\x{2a3} + +/[\pL&&\x{100}-\x{1000}]{3,6}?\x{2A3}/utf,alt_extended_class + \x{145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{2a3} + 0: \x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{2a3} + \x{145}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3} + 0: \x{145}\x{18b}\x{1c2}\x{2a3} + \x{2a3}A\x{145}\x{18b}\x{1c2}\x{2a3}\x{2a3} + 0: \x{145}\x{18b}\x{1c2}\x{2a3} + \x{0145}\x{18b}\x{2a3}A\x{145}\x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{145}\x{2a3} + 0: \x{18b}\x{1c2}\x{21a}\x{257}\x{2ae}\x{145}\x{2a3} + +/[\P{scx=Beng}\P{scx=Deva}\pM--[\x{2000}-\x{3000}]]+/utf,alt_extended_class + \x{964}\x{2000}\x{3000}A\x{951}\x{1fff}\x{3001}\x{965} + 0: A\x{951}\x{1fff}\x{3001} + +/[\p{Thai}~~[^]]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [^\p{Thai}] + Ket + End +------------------------------------------------------------------ + \x{0d01} + 0: \x{d01} + a + 0: a +\= Expect no match + \x{0e01} +No match + +/[[]~~[^]]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + \x{0d01} + 0: \x{d01} + a + 0: a + +/[[^]~~[]]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + \x{0d01} + 0: \x{d01} + a + 0: a + +/[[^]~~[^]]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + \x{0d01} +No match + a +No match + +/[[^]||\pL]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + a + 0: a + +/[\pL||[^]]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + a + 0: a + +/[\pL~~[^]]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [^\p{L}] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 +\= Expect no match + a +No match + +/[[^]~~\pL]/B,utf,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [^\p{L}] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 +\= Expect no match + a +No match + +/([\p{Lu}&&\p{sc=Hung}]+?\x{10c81})+#/utf,alt_extended_class + \x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10cf2}\x{10c81}#\x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10c81}## + 0: \x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10c81}# + 1: \x{10c85}\x{10cb0}\x{10c81} + +/[[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]] +&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]]/utf,alt_extended_class + +# -------------- + +/^([\h\x{9000}\x{9002}\x{9004}][\v\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}][\h\v\x{9000}],){4}$/B,utf +------------------------------------------------------------------ + Bra + ^ + CBra 1 + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{9000}\x{9002}\x{9004}] + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}] + [\x09-\x0d \x85\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}-\x{2029}\x{202f}\x{205f}\x{3000}\x{9000}] + , + Ket + CBra 1 + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{9000}\x{9002}\x{9004}] + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}] + [\x09-\x0d \x85\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}-\x{2029}\x{202f}\x{205f}\x{3000}\x{9000}] + , + Ket + CBra 1 + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{9000}\x{9002}\x{9004}] + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}] + [\x09-\x0d \x85\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}-\x{2029}\x{202f}\x{205f}\x{3000}\x{9000}] + , + Ket + CBra 1 + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{9000}\x{9002}\x{9004}] + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}] + [\x09-\x0d \x85\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}-\x{2029}\x{202f}\x{205f}\x{3000}\x{9000}] + , + Ket + $ + Ket + End +------------------------------------------------------------------ + \x09\x0a\x0d,\x{1680}\x{2028}\x{1680},\x{180e}\x{2029}\x{180e},\x{9000}\x{9000}\x{9000}, + 0: \x{09}\x{0a}\x{0d},\x{1680}\x{2028}\x{1680},\x{180e}\x{2029}\x{180e},\x{9000}\x{9000}\x{9000}, + 1: \x{9000}\x{9000}\x{9000}, + +/[z-\p{Lu}]/ +Failed: error 150 at offset 9: invalid range in character class + +/[z-\pL]/ +Failed: error 150 at offset 6: invalid range in character class + +/[\p{Lu}-z]/ +Failed: error 150 at offset 8: invalid range in character class + +/[\pL-z]/ +Failed: error 150 at offset 5: invalid range in character class + +/[a\x{e1}]/iB +------------------------------------------------------------------ + Bra + [Aa\xe1] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + \x{e1} + 0: \xe1 + +/[a\x{e1}]/iB,utf +------------------------------------------------------------------ + Bra + [Aa\xc1\xe1] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + \x{e1} + 0: \x{e1} + \x{c1} + 0: \x{c1} + +/[a\x{e1}]/iB,ucp +------------------------------------------------------------------ + Bra + [Aa\xc1\xe1] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + \x{e1} + 0: \xe1 + \x{c1} + 0: \xc1 + +/[a\x{e1}]/iB,ucp,utf +------------------------------------------------------------------ + Bra + [Aa\xc1\xe1] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + \x{e1} + 0: \x{e1} + \x{c1} + 0: \x{c1} + # End of testinput5 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 5af9f46..74c40e9 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -6072,8 +6072,8 @@ Subject length lower bound = 0 /\w+(.)(.)?def/Is Capture group count = 2 Options: dotall -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Last code unit = 'f' Subject length lower bound = 5 abc\ndef @@ -6569,9 +6569,8 @@ No match No match /a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames -\= Expect no match ab -No match + 0: ab /a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames aXb @@ -6769,11 +6768,24 @@ Partial match: dogs /Z(*F)Q|ZXY/ Z\=ps Partial match: Z + XY\=dfa_restart + 0: XY \= Expect no match ZA\=ps No match X\=ps No match + +/Z(?:(*F)Q|XY)/ + Z\=ps +Partial match: Z + XY\=dfa_restart + 0: XY + +/Z(*F)Q|Z(*F)XY/ +\= Expect no match + Z\=ps +No match /\bthe cat\b/ the cat\=ps @@ -6834,7 +6846,7 @@ Partial match: +ab /(abc|def|xyz)/I Capture group count = 1 -Starting code units: a d x +Starting code units: a d x Subject length lower bound = 3 terhjk;abcdaadsfe 0: abc @@ -6847,6 +6859,7 @@ No match /(abc|def|xyz)/I,no_start_optimize Capture group count = 1 Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor terhjk;abcdaadsfe 0: abc the quick xyz brown fox @@ -7005,7 +7018,7 @@ No match abc\=offset=4 Failed: error -33: bad offset value abc\=offset=-4 -** Invalid value in 'offset=-4' +** Invalid value in "offset=-4" \= Expect no match abc\=offset=3 No match @@ -7437,10 +7450,10 @@ Get substring 4 failed (-54): requested value is not available /(?aa)/ aa\=get=A 0: aa -Get substring 'A' failed (-41): function is not supported for DFA matching +Get substring "A" failed (-41): function is not supported for DFA matching aa\=copy=A 0: aa -Copy substring 'A' failed (-41): function is not supported for DFA matching +Copy substring "A" failed (-41): function is not supported for DFA matching /a+/no_auto_possess a\=ovector=2,get=1,get=2,getall @@ -7920,4 +7933,235 @@ Partial match: 3: a 4: +/([a-z]++)(*scs:(1).)/ + aa +Failed: error -42: pattern contains an item that is not supported for DFA matching + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[a[]/ + [ + 0: [ + +/[a[B]]C/alt_extended_class + aC + 0: aC + BC + 0: BC +\= Expect no match + [C +No match + +/[[A][B]]/alt_extended_class + A + 0: A + B + 0: B +\= Expect no match + [ +No match + ] +No match + +/[[A]||[B]]/alt_extended_class + A + 0: A + B + 0: B +\= Expect no match + C +No match + +/[[^A][B]]/alt_extended_class + B + 0: B + C + 0: C +\= Expect no match + A +No match + +/[^[A][B]]/alt_extended_class + C + 0: C +\= Expect no match + A +No match + B +No match + +/[^[A]&&[B]]/alt_extended_class + A + 0: A + B + 0: B + C + 0: C + +/[A[]]]/alt_extended_class + A + 0: A + ] + 0: ] +\= Expect no match + [ +No match + +/[A[^]]]/alt_extended_class + A + 0: A + [ + 0: [ + C + 0: C +\= Expect no match + ] +No match + +/[A[]]/alt_extended_class,allow_empty_class + A + 0: A +\= Expect no match + ] +No match + [ +No match + +/[A[^]]/alt_extended_class,allow_empty_class + A + 0: A + C + 0: C + [ + 0: [ + ] + 0: ] + +/[A-C--B]/alt_extended_class + A + 0: A + C + 0: C +\= Expect no match + B +No match + +/[^A-C--B]/alt_extended_class + B + 0: B +\= Expect no match + A +No match + C +No match + +/[[\d\D]--b]/alt_extended_class + a + 0: a + c + 0: c +\= Expect no match + b +No match + +/[\dAC-E[:space:]&&[^z]]/alt_extended_class + 0 + 0: 0 + A + 0: A + C + 0: C + D + 0: D + E + 0: E + \t + 0: \x09 +\= Expect no match + B +No match + F +No match + ; +No match + +/[z||[^\dAC-E[:space:]]]/alt_extended_class + z + 0: z + B + 0: B + F + 0: F + ; + 0: ; +\= Expect no match + 0 +No match + A +No match + C +No match + D +No match + E +No match + \t +No match + +/[a-c--b]+/alt_extended_class + ac + 0: ac + a + 0: a +\= Expect no match + b +No match + +/[a-c--b]{2,3}/alt_extended_class + ac + 0: ac + cac + 0: cac +\= Expect no match + a +No match + bb +No match + +/x[a-c--b]+y/alt_extended_class + xacy + 0: xacy + xaay + 0: xaay + xay + 0: xay +\= Expect no match + zacy +No match + xacz +No match + xy +No match + xby +No match + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[[A]+[B]])/ + A + 0: A + B + 0: B +\= Expect no match + [ +No match + ] +No match + +# -------------- + # End of testinput6 diff --git a/testdata/testoutput7 b/testdata/testoutput7 index cfa1881..aa36e9c 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -1814,17 +1814,6 @@ No match \x{660}\x{661}\x{662}ABC No match -/\p{Lu}/i,utf - A - 0: A - a\x{10a0}B - 0: \x{10a0} -\= Expect no match - a -No match - \x{1d00} -No match - /\p{^Lu}/i,utf 1234 0: 1 @@ -2007,42 +1996,6 @@ No match A 0: A -/\p{Lu}/utf - A - 0: A - aZ - 0: Z -\= Expect no match - abc -No match - -/\p{Lu}/i,utf - A - 0: A - aZ - 0: Z -\= Expect no match - abc -No match - -/\p{Ll}/utf - a - 0: a - Az - 0: z -\= Expect no match - ABC -No match - -/\p{Ll}/i,utf - a - 0: a - Az - 0: z -\= Expect no match - ABC -No match - /^\x{c0}$/i,utf \x{c0} 0: \x{c0} @@ -2867,7 +2820,7 @@ No match !\x{c0}+++++ 0: \x{c0}++ -# Without PCRE_UCP, non-ASCII always fail, even if < 256 +# Without PCRE2_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ @@ -2882,7 +2835,7 @@ No match !\x{c0}+++++ No match -# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties +# With PCRE2_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ @@ -3863,7 +3816,7 @@ No match /[Ss]+/iB,utf ------------------------------------------------------------------ Bra - [Ss\x{17f}\x{17f}]++ + [Ss\x{17f}]++ Ket End ------------------------------------------------------------------ @@ -3881,7 +3834,7 @@ No match /[S\x{17f}]/iB,utf ------------------------------------------------------------------ Bra - [Ss\x{17f}\x{17f}] + [Ss\x{17f}] Ket End ------------------------------------------------------------------ @@ -3897,7 +3850,7 @@ No match /[\x{17f}s]/iB,utf ------------------------------------------------------------------ Bra - [Ss\x{17f}\x{17f}] + [Ss\x{17f}] Ket End ------------------------------------------------------------------ @@ -3913,7 +3866,7 @@ No match /[\x{4b}\x{6b}]/iB,utf ------------------------------------------------------------------ Bra - [Kk\x{212a}\x{212a}] + [Kk\x{212a}] Ket End ------------------------------------------------------------------ @@ -3936,6 +3889,251 @@ No match # End caseless restrict tests +# TESTS for PCRE2_EXTRA_TURKISH_CASING - again, tests with and without. + +/i/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/i/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/I/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/I/i,utf,turkish_casing + I + 0: I + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + \x{0130} +No match + +/\x{0130}/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/\x{0130}/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/\x{0131}/i,utf + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + I +No match + \x{0130} +No match + +/\x{0131}/i,utf,turkish_casing + I + 0: I + \x{0131} + 0: \x{131} +\= Expect no match + i +No match + \x{0130} +No match + +/[i]/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/[i]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[\x{0130}]/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/[\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[\x{0120}-\x{0130}]/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/[\x{0120}-\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[zi]/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/[zi]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[z\x{0130}]/i,utf + \x{0130} + 0: \x{130} +\= Expect no match + i +No match + I +No match + \x{0131} +No match + +/[z\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +/[iI]/i,utf + i + 0: i + I + 0: I +\= Expect no match + \x{0130} +No match + \x{0131} +No match + +/[iI]/i,utf,turkish_casing + i + 0: i + I + 0: I + \x{0130} + 0: \x{130} + \x{0131} + 0: \x{131} + +/[i\x{0130}]/i,utf + i + 0: i + I + 0: I + \x{0130} + 0: \x{130} +\= Expect no match + \x{0131} +No match + +/[i\x{0130}]/i,utf,turkish_casing + i + 0: i + \x{0130} + 0: \x{130} +\= Expect no match + I +No match + \x{0131} +No match + +# End Turkish casing tests + # TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. # DIGITS @@ -4227,4 +4425,128 @@ No match 8: cafe\x{300} 9: cafe +# -------------------------------------------------------------------------- +# Case-independent matching property tests added after changing PCRE2 to be +# compatible with Perl. All three cases (upper, lower, title) conflate. + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/utf + >AbbD< + 0: AbbD + >Abb\x{01c5}< + 0: Abb\x{1c5} +\= Expect no match + >aBBd< +No match + >aB!!< +No match + +/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf + >aB!!< + 0: aB!! +\= Expect no match + >AbbD< +No match + >aBBd< +No match + >Abb\x{01c5}< +No match + +/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf + >aB!!< + 0: aB!! +\= Expect no match + >AbbD< +No match + >aBBd< +No match + >Abb\x{01c5}< +No match + +# -------------- + +# EXTENDED CHARACTER CLASSES + +/[\p{Ll}[\p{Nd}]]C/alt_extended_class + aC + 0: aC + 1C + 0: 1C +\= Expect no match + [C +No match + +/[[\p{Ll}][\p{Nd}]]/alt_extended_class + a + 0: a + 1 + 0: 1 +\= Expect no match + [ +No match + ] +No match + +/[[\p{Ll}]||[\p{Nd}]]/alt_extended_class + a + 0: a + 1 + 0: 1 +\= Expect no match + C +No match + +/[[^\p{Ll}][\p{Nd}]]/alt_extended_class + 1 + 0: 1 + A + 0: A +\= Expect no match + a +No match + +/[^[\p{Ll}][\p{Nd}]]/alt_extended_class + A + 0: A +\= Expect no match + a +No match + 1 +No match + +/[^[\p{Ll}]&&[\p{Nd}]]/alt_extended_class + a + 0: a + 1 + 0: 1 + A + 0: A + +/(?[[\p{Ll}]+[\p{Nd}]])/ + a + 0: a + 1 + 0: 1 +\= Expect no match + [ +No match + ] +No match + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +/(?[[\p{Ll}Z]&[\p{Lu}a]])/ + a + 0: a + Z + 0: Z +\= Expect no match + A +No match + z +No match + +# -------------------------------------------------------------------------- + # End of testinput7 diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2 index bcb9e17..f2a3815 100644 --- a/testdata/testoutput8-16-2 +++ b/testdata/testoutput8-16-2 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 5 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 174 -Memory allocation - code portion : 38 +Memory allocation - code size : 38 ------------------------------------------------------------------ 0 16 Bra 2 7 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 38 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 172 -Memory allocation - code portion : 36 +Memory allocation - code size : 36 ------------------------------------------------------------------ 0 15 Bra 2 6 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 36 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 182 -Memory allocation - code portion : 46 +Memory allocation - code size : 46 ------------------------------------------------------------------ 0 20 Bra 2 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 46 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 2 Bra 2 2 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 7 Bra 2 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 20 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 10 Bra 2 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 26 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 278 -Memory allocation - code portion : 142 +Memory allocation - code size : 142 ------------------------------------------------------------------ 0 68 Bra 2 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 142 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 1784 -Memory allocation - code portion : 1648 +Memory allocation - code size : 1648 ------------------------------------------------------------------ 0 821 Bra 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 1648 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 1764 -Memory allocation - code portion : 1628 +Memory allocation - code size : 1628 ------------------------------------------------------------------ 0 811 Bra 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 1628 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 13 Bra 2 9 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 176 -Memory allocation - code portion : 40 +Memory allocation - code size : 40 ------------------------------------------------------------------ 0 17 Bra 2 13 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 40 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 242 -Memory allocation - code portion : 54 +Memory allocation - code size : 54 +Memory allocation - data size : 52 ------------------------------------------------------------------ 0 24 Bra 2 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 54 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 218 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 +Memory allocation - data size : 18 ------------------------------------------------------------------ 0 29 Bra 2 18 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 64 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 196 -Memory allocation - code portion : 54 +Memory allocation - code size : 54 +Memory allocation - data size : 6 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 54 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 186 -Memory allocation - code portion : 50 +Memory allocation - code size : 50 ------------------------------------------------------------------ 0 22 Bra 2 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 50 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 214 -Memory allocation - code portion : 78 +Memory allocation - code size : 78 ------------------------------------------------------------------ 0 36 Bra 2 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 78 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 152 -Memory allocation - code portion : 16 +Memory allocation - code size : 16 ------------------------------------------------------------------ 0 5 Bra 2 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 16 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 152 -Memory allocation - code portion : 16 +Memory allocation - code size : 16 ------------------------------------------------------------------ 0 5 Bra 2 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 16 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 152 -Memory allocation - code portion : 16 +Memory allocation - code size : 16 ------------------------------------------------------------------ 0 5 Bra 2 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 16 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 10 Bra 2 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 158 -Memory allocation - code portion : 22 +Memory allocation - code size : 22 ------------------------------------------------------------------ 0 8 Bra 2 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \x{c5b4} Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 158 -Memory allocation - code portion : 22 +Memory allocation - code size : 22 ------------------------------------------------------------------ 0 8 Bra 2 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x{8a9e} Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 190 -Memory allocation - code portion : 54 +Memory allocation - code size : 54 ------------------------------------------------------------------ 0 24 Bra 2 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 54 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 26 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 26 Failed: error 106 at offset 13: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 27 Bra - 2 [a-c\p{L}\x{660}] + 2 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 27 27 Ket 29 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 194 -Memory allocation - code portion : 58 +Memory allocation - code size : 58 ------------------------------------------------------------------ 0 26 Bra - 2 [+\-\p{Nd}]++ + 2 [+\-0-9\p{Nd}]++ 26 26 Ket 28 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 13 Bra 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 13 Bra 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 9 Bra 2 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 23 Bra 2 19 CBra 1 5 Brazero 6 13 SCBra 2 9 6 Cond - 11 1 Cond ref + 11 1 Capture ref 13 0 15 2 Alt 17 8 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 178 -Memory allocation - code portion : 42 +Memory allocation - code size : 42 ------------------------------------------------------------------ 0 18 Bra 2 14 CBra 1 5 Brazero 6 6 SCond - 8 1 Cond ref + 8 1 Capture ref 10 0 12 2 Alt 14 8 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 42 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ @@ -913,36 +860,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 79 Bra 2 70 Once 4 6 Cond - 6 1 Cond ref + 6 1 Capture ref 8 74 Recurse 10 6 Ket 12 6 Cond - 14 1 Cond ref + 14 1 Capture ref 16 74 Recurse 18 6 Ket 20 6 Cond - 22 1 Cond ref + 22 1 Capture ref 24 74 Recurse 26 6 Ket 28 6 Cond - 30 1 Cond ref + 30 1 Capture ref 32 74 Recurse 34 6 Ket 36 6 Cond - 38 1 Cond ref + 38 1 Capture ref 40 74 Recurse 42 6 Ket 44 6 Cond - 46 1 Cond ref + 46 1 Capture ref 48 74 Recurse 50 6 Ket 52 6 Cond - 54 1 Cond ref + 54 1 Capture ref 56 74 Recurse 58 6 Ket 60 10 SBraPos 62 6 SCond - 64 1 Cond ref + 64 1 Capture ref 66 74 Recurse 68 6 Ket 70 10 KetRpos @@ -965,7 +912,7 @@ Subject length lower bound = 0 0 43 Bra 2 34 Once 4 4 Cond - 6 1 Cond ref + 6 1 Capture ref 8 8 Alt 10 a 12 38 Recurse @@ -973,7 +920,7 @@ Subject length lower bound = 0 16 12 Ket 18 16 SBraPos 20 4 SCond - 22 1 Cond ref + 22 1 Capture ref 24 8 Alt 26 a 28 38 Recurse diff --git a/testdata/testoutput8-16-3 b/testdata/testoutput8-16-3 index 4ec13ea..d46ceba 100644 --- a/testdata/testoutput8-16-3 +++ b/testdata/testoutput8-16-3 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 12 Bra 3 6 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 20 Bra 3 8 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 182 -Memory allocation - code portion : 46 +Memory allocation - code size : 46 ------------------------------------------------------------------ 0 19 Bra 3 7 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 46 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 186 -Memory allocation - code portion : 50 +Memory allocation - code size : 50 ------------------------------------------------------------------ 0 21 Bra 3 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 50 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 3 Bra 3 3 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 8 Bra 3 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 170 -Memory allocation - code portion : 34 +Memory allocation - code size : 34 ------------------------------------------------------------------ 0 13 Bra 3 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 34 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 302 -Memory allocation - code portion : 166 +Memory allocation - code size : 166 ------------------------------------------------------------------ 0 79 Bra 3 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 166 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 1788 -Memory allocation - code portion : 1652 +Memory allocation - code size : 1652 ------------------------------------------------------------------ 0 822 Bra 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 1652 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 1768 -Memory allocation - code portion : 1632 +Memory allocation - code size : 1632 ------------------------------------------------------------------ 0 812 Bra 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 1632 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 178 -Memory allocation - code portion : 42 +Memory allocation - code size : 42 ------------------------------------------------------------------ 0 17 Bra 3 11 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 42 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 190 -Memory allocation - code portion : 54 +Memory allocation - code size : 54 ------------------------------------------------------------------ 0 23 Bra 3 17 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 54 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 256 -Memory allocation - code portion : 68 +Memory allocation - code size : 68 +Memory allocation - data size : 52 ------------------------------------------------------------------ 0 30 Bra 3 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 68 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 238 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 +Memory allocation - data size : 18 ------------------------------------------------------------------ 0 38 Bra 3 23 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 84 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 206 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 +Memory allocation - data size : 6 ------------------------------------------------------------------ 0 28 Bra 3 6 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 64 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 198 -Memory allocation - code portion : 62 +Memory allocation - code size : 62 ------------------------------------------------------------------ 0 27 Bra 3 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 62 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 242 -Memory allocation - code portion : 106 +Memory allocation - code size : 106 ------------------------------------------------------------------ 0 49 Bra 3 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 106 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 6 Bra 3 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 20 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 6 Bra 3 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 20 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 6 Bra 3 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 20 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 9 Bra 3 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \x{c5b4} Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 9 Bra 3 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x{8a9e} Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 26 Bra 3 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 12 Bra 3 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 12 Bra 3 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 32 Failed: error 106 at offset 13: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 202 -Memory allocation - code portion : 66 +Memory allocation - code size : 66 ------------------------------------------------------------------ 0 29 Bra - 3 [a-c\p{L}\x{660}] + 3 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 29 29 Ket 32 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 200 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 ------------------------------------------------------------------ 0 28 Bra - 3 [+\-\p{Nd}]++ + 3 [+\-0-9\p{Nd}]++ 28 28 Ket 31 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 172 -Memory allocation - code portion : 36 +Memory allocation - code size : 36 ------------------------------------------------------------------ 0 14 Bra 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 36 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 172 -Memory allocation - code portion : 36 +Memory allocation - code size : 36 ------------------------------------------------------------------ 0 14 Bra 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 36 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 206 -Memory allocation - code portion : 70 +Memory allocation - code size : 70 ------------------------------------------------------------------ 0 31 Bra 3 25 CBra 1 7 Brazero 8 17 SCBra 2 12 7 Cond - 15 1 Cond ref + 15 1 Capture ref 17 0 19 3 Alt 22 10 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 70 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 192 -Memory allocation - code portion : 56 +Memory allocation - code size : 56 ------------------------------------------------------------------ 0 24 Bra 3 18 CBra 1 7 Brazero 8 7 SCond - 11 1 Cond ref + 11 1 Capture ref 13 0 15 3 Alt 18 10 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 56 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^a] + 3 [^a] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^a] + 3 [^a] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^\x{aa}] + 3 [^\x{aa}] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^\x{aa}] + 3 [^\x{aa}] (not) 5 5 Ket 8 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 110 Bra 3 97 Once 6 8 Cond - 9 1 Cond ref + 9 1 Capture ref 11 103 Recurse 14 8 Ket 17 8 Cond - 20 1 Cond ref + 20 1 Capture ref 22 103 Recurse 25 8 Ket 28 8 Cond - 31 1 Cond ref + 31 1 Capture ref 33 103 Recurse 36 8 Ket 39 8 Cond - 42 1 Cond ref + 42 1 Capture ref 44 103 Recurse 47 8 Ket 50 8 Cond - 53 1 Cond ref + 53 1 Capture ref 55 103 Recurse 58 8 Ket 61 8 Cond - 64 1 Cond ref + 64 1 Capture ref 66 103 Recurse 69 8 Ket 72 8 Cond - 75 1 Cond ref + 75 1 Capture ref 77 103 Recurse 80 8 Ket 83 14 SBraPos 86 8 SCond - 89 1 Cond ref + 89 1 Capture ref 91 103 Recurse 94 8 Ket 97 14 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 58 Bra 3 45 Once 6 5 Cond - 9 1 Cond ref + 9 1 Capture ref 11 10 Alt 14 a 16 51 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 21 15 Ket 24 21 SBraPos 27 5 SCond - 30 1 Cond ref + 30 1 Capture ref 32 10 Alt 35 a 37 51 Recurse diff --git a/testdata/testoutput8-16-4 b/testdata/testoutput8-16-4 index 4ec13ea..d46ceba 100644 --- a/testdata/testoutput8-16-4 +++ b/testdata/testoutput8-16-4 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 12 Bra 3 6 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 20 Bra 3 8 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 182 -Memory allocation - code portion : 46 +Memory allocation - code size : 46 ------------------------------------------------------------------ 0 19 Bra 3 7 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 46 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 186 -Memory allocation - code portion : 50 +Memory allocation - code size : 50 ------------------------------------------------------------------ 0 21 Bra 3 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 50 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 3 Bra 3 3 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 8 Bra 3 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 170 -Memory allocation - code portion : 34 +Memory allocation - code size : 34 ------------------------------------------------------------------ 0 13 Bra 3 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 34 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 302 -Memory allocation - code portion : 166 +Memory allocation - code size : 166 ------------------------------------------------------------------ 0 79 Bra 3 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 166 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 1788 -Memory allocation - code portion : 1652 +Memory allocation - code size : 1652 ------------------------------------------------------------------ 0 822 Bra 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 1652 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 1768 -Memory allocation - code portion : 1632 +Memory allocation - code size : 1632 ------------------------------------------------------------------ 0 812 Bra 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 1632 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 178 -Memory allocation - code portion : 42 +Memory allocation - code size : 42 ------------------------------------------------------------------ 0 17 Bra 3 11 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 42 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 190 -Memory allocation - code portion : 54 +Memory allocation - code size : 54 ------------------------------------------------------------------ 0 23 Bra 3 17 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 54 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 256 -Memory allocation - code portion : 68 +Memory allocation - code size : 68 +Memory allocation - data size : 52 ------------------------------------------------------------------ 0 30 Bra 3 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 68 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 238 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 +Memory allocation - data size : 18 ------------------------------------------------------------------ 0 38 Bra 3 23 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 84 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 206 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 +Memory allocation - data size : 6 ------------------------------------------------------------------ 0 28 Bra 3 6 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 64 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 198 -Memory allocation - code portion : 62 +Memory allocation - code size : 62 ------------------------------------------------------------------ 0 27 Bra 3 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 62 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 242 -Memory allocation - code portion : 106 +Memory allocation - code size : 106 ------------------------------------------------------------------ 0 49 Bra 3 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 106 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 6 Bra 3 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 20 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 6 Bra 3 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 20 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 6 Bra 3 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 20 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 9 Bra 3 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \x{c5b4} Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 9 Bra 3 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x{8a9e} Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 26 Bra 3 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 12 Bra 3 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 12 Bra 3 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 32 Failed: error 106 at offset 13: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 202 -Memory allocation - code portion : 66 +Memory allocation - code size : 66 ------------------------------------------------------------------ 0 29 Bra - 3 [a-c\p{L}\x{660}] + 3 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 29 29 Ket 32 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 200 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 ------------------------------------------------------------------ 0 28 Bra - 3 [+\-\p{Nd}]++ + 3 [+\-0-9\p{Nd}]++ 28 28 Ket 31 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 172 -Memory allocation - code portion : 36 +Memory allocation - code size : 36 ------------------------------------------------------------------ 0 14 Bra 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 36 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 172 -Memory allocation - code portion : 36 +Memory allocation - code size : 36 ------------------------------------------------------------------ 0 14 Bra 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 36 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 11 Bra 3 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 206 -Memory allocation - code portion : 70 +Memory allocation - code size : 70 ------------------------------------------------------------------ 0 31 Bra 3 25 CBra 1 7 Brazero 8 17 SCBra 2 12 7 Cond - 15 1 Cond ref + 15 1 Capture ref 17 0 19 3 Alt 22 10 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 70 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 192 -Memory allocation - code portion : 56 +Memory allocation - code size : 56 ------------------------------------------------------------------ 0 24 Bra 3 18 CBra 1 7 Brazero 8 7 SCond - 11 1 Cond ref + 11 1 Capture ref 13 0 15 3 Alt 18 10 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 56 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra 3 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^a] + 3 [^a] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^a] + 3 [^a] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^\x{aa}] + 3 [^\x{aa}] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 5 Bra - 3 [^\x{aa}] + 3 [^\x{aa}] (not) 5 5 Ket 8 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 110 Bra 3 97 Once 6 8 Cond - 9 1 Cond ref + 9 1 Capture ref 11 103 Recurse 14 8 Ket 17 8 Cond - 20 1 Cond ref + 20 1 Capture ref 22 103 Recurse 25 8 Ket 28 8 Cond - 31 1 Cond ref + 31 1 Capture ref 33 103 Recurse 36 8 Ket 39 8 Cond - 42 1 Cond ref + 42 1 Capture ref 44 103 Recurse 47 8 Ket 50 8 Cond - 53 1 Cond ref + 53 1 Capture ref 55 103 Recurse 58 8 Ket 61 8 Cond - 64 1 Cond ref + 64 1 Capture ref 66 103 Recurse 69 8 Ket 72 8 Cond - 75 1 Cond ref + 75 1 Capture ref 77 103 Recurse 80 8 Ket 83 14 SBraPos 86 8 SCond - 89 1 Cond ref + 89 1 Capture ref 91 103 Recurse 94 8 Ket 97 14 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 58 Bra 3 45 Once 6 5 Cond - 9 1 Cond ref + 9 1 Capture ref 11 10 Alt 14 a 16 51 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 21 15 Ket 24 21 SBraPos 27 5 SCond - 30 1 Cond ref + 30 1 Capture ref 32 10 Alt 35 a 37 51 Recurse diff --git a/testdata/testoutput8-32-2 b/testdata/testoutput8-32-2 index d76f3aa..e9865c8 100644 --- a/testdata/testoutput8-32-2 +++ b/testdata/testoutput8-32-2 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 5 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 212 -Memory allocation - code portion : 76 +Memory allocation - code size : 76 ------------------------------------------------------------------ 0 16 Bra 2 7 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 76 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 208 -Memory allocation - code portion : 72 +Memory allocation - code size : 72 ------------------------------------------------------------------ 0 15 Bra 2 6 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 72 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 2 Bra 2 2 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 176 -Memory allocation - code portion : 40 +Memory allocation - code size : 40 ------------------------------------------------------------------ 0 7 Bra 2 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 40 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 356 -Memory allocation - code portion : 220 +Memory allocation - code size : 220 ------------------------------------------------------------------ 0 52 Bra 2 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 220 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 3432 -Memory allocation - code portion : 3296 +Memory allocation - code size : 3296 ------------------------------------------------------------------ 0 821 Bra 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 3296 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 3392 -Memory allocation - code portion : 3256 +Memory allocation - code size : 3256 ------------------------------------------------------------------ 0 811 Bra 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 3256 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 200 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 ------------------------------------------------------------------ 0 13 Bra 2 9 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 64 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 216 -Memory allocation - code portion : 80 +Memory allocation - code size : 80 ------------------------------------------------------------------ 0 17 Bra 2 13 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 80 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 348 -Memory allocation - code portion : 108 +Memory allocation - code size : 108 +Memory allocation - data size : 104 ------------------------------------------------------------------ 0 24 Bra 2 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 108 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 300 -Memory allocation - code portion : 128 +Memory allocation - code size : 128 +Memory allocation - data size : 36 ------------------------------------------------------------------ 0 29 Bra 2 18 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 128 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 256 -Memory allocation - code portion : 108 +Memory allocation - code size : 108 +Memory allocation - data size : 12 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 108 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 236 -Memory allocation - code portion : 100 +Memory allocation - code size : 100 ------------------------------------------------------------------ 0 22 Bra 2 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 100 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 292 -Memory allocation - code portion : 156 +Memory allocation - code size : 156 ------------------------------------------------------------------ 0 36 Bra 2 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 156 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 28 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 8 Bra 2 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \x{c5b4} Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 8 Bra 2 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x{8a9e} Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 212 -Memory allocation - code portion : 76 +Memory allocation - code size : 76 ------------------------------------------------------------------ 0 16 Bra 2 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 76 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 52 Failed: error 106 at offset 13: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 224 -Memory allocation - code portion : 88 +Memory allocation - code size : 88 ------------------------------------------------------------------ 0 19 Bra - 2 [a-c\p{L}\x{660}] + 2 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 19 19 Ket 21 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 220 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 ------------------------------------------------------------------ 0 18 Bra - 2 [+\-\p{Nd}]++ + 2 [+\-0-9\p{Nd}]++ 18 18 Ket 20 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 240 -Memory allocation - code portion : 104 +Memory allocation - code size : 104 ------------------------------------------------------------------ 0 23 Bra 2 19 CBra 1 5 Brazero 6 13 SCBra 2 9 6 Cond - 11 1 Cond ref + 11 1 Capture ref 13 0 15 2 Alt 17 8 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 104 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 220 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 ------------------------------------------------------------------ 0 18 Bra 2 14 CBra 1 5 Brazero 6 6 SCond - 8 1 Cond ref + 8 1 Capture ref 10 0 12 2 Alt 14 8 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 84 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 79 Bra 2 70 Once 4 6 Cond - 6 1 Cond ref + 6 1 Capture ref 8 74 Recurse 10 6 Ket 12 6 Cond - 14 1 Cond ref + 14 1 Capture ref 16 74 Recurse 18 6 Ket 20 6 Cond - 22 1 Cond ref + 22 1 Capture ref 24 74 Recurse 26 6 Ket 28 6 Cond - 30 1 Cond ref + 30 1 Capture ref 32 74 Recurse 34 6 Ket 36 6 Cond - 38 1 Cond ref + 38 1 Capture ref 40 74 Recurse 42 6 Ket 44 6 Cond - 46 1 Cond ref + 46 1 Capture ref 48 74 Recurse 50 6 Ket 52 6 Cond - 54 1 Cond ref + 54 1 Capture ref 56 74 Recurse 58 6 Ket 60 10 SBraPos 62 6 SCond - 64 1 Cond ref + 64 1 Capture ref 66 74 Recurse 68 6 Ket 70 10 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 43 Bra 2 34 Once 4 4 Cond - 6 1 Cond ref + 6 1 Capture ref 8 8 Alt 10 a 12 38 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 16 12 Ket 18 16 SBraPos 20 4 SCond - 22 1 Cond ref + 22 1 Capture ref 24 8 Alt 26 a 28 38 Recurse diff --git a/testdata/testoutput8-32-3 b/testdata/testoutput8-32-3 index d76f3aa..e9865c8 100644 --- a/testdata/testoutput8-32-3 +++ b/testdata/testoutput8-32-3 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 5 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 212 -Memory allocation - code portion : 76 +Memory allocation - code size : 76 ------------------------------------------------------------------ 0 16 Bra 2 7 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 76 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 208 -Memory allocation - code portion : 72 +Memory allocation - code size : 72 ------------------------------------------------------------------ 0 15 Bra 2 6 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 72 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 2 Bra 2 2 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 176 -Memory allocation - code portion : 40 +Memory allocation - code size : 40 ------------------------------------------------------------------ 0 7 Bra 2 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 40 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 356 -Memory allocation - code portion : 220 +Memory allocation - code size : 220 ------------------------------------------------------------------ 0 52 Bra 2 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 220 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 3432 -Memory allocation - code portion : 3296 +Memory allocation - code size : 3296 ------------------------------------------------------------------ 0 821 Bra 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 3296 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 3392 -Memory allocation - code portion : 3256 +Memory allocation - code size : 3256 ------------------------------------------------------------------ 0 811 Bra 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 3256 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 200 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 ------------------------------------------------------------------ 0 13 Bra 2 9 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 64 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 216 -Memory allocation - code portion : 80 +Memory allocation - code size : 80 ------------------------------------------------------------------ 0 17 Bra 2 13 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 80 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 348 -Memory allocation - code portion : 108 +Memory allocation - code size : 108 +Memory allocation - data size : 104 ------------------------------------------------------------------ 0 24 Bra 2 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 108 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 300 -Memory allocation - code portion : 128 +Memory allocation - code size : 128 +Memory allocation - data size : 36 ------------------------------------------------------------------ 0 29 Bra 2 18 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 128 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 256 -Memory allocation - code portion : 108 +Memory allocation - code size : 108 +Memory allocation - data size : 12 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 108 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 236 -Memory allocation - code portion : 100 +Memory allocation - code size : 100 ------------------------------------------------------------------ 0 22 Bra 2 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 100 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 292 -Memory allocation - code portion : 156 +Memory allocation - code size : 156 ------------------------------------------------------------------ 0 36 Bra 2 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 156 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 28 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 8 Bra 2 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \x{c5b4} Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 8 Bra 2 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x{8a9e} Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 212 -Memory allocation - code portion : 76 +Memory allocation - code size : 76 ------------------------------------------------------------------ 0 16 Bra 2 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 76 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 52 Failed: error 106 at offset 13: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 224 -Memory allocation - code portion : 88 +Memory allocation - code size : 88 ------------------------------------------------------------------ 0 19 Bra - 2 [a-c\p{L}\x{660}] + 2 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 19 19 Ket 21 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 220 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 ------------------------------------------------------------------ 0 18 Bra - 2 [+\-\p{Nd}]++ + 2 [+\-0-9\p{Nd}]++ 18 18 Ket 20 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 240 -Memory allocation - code portion : 104 +Memory allocation - code size : 104 ------------------------------------------------------------------ 0 23 Bra 2 19 CBra 1 5 Brazero 6 13 SCBra 2 9 6 Cond - 11 1 Cond ref + 11 1 Capture ref 13 0 15 2 Alt 17 8 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 104 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 220 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 ------------------------------------------------------------------ 0 18 Bra 2 14 CBra 1 5 Brazero 6 6 SCond - 8 1 Cond ref + 8 1 Capture ref 10 0 12 2 Alt 14 8 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 84 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 79 Bra 2 70 Once 4 6 Cond - 6 1 Cond ref + 6 1 Capture ref 8 74 Recurse 10 6 Ket 12 6 Cond - 14 1 Cond ref + 14 1 Capture ref 16 74 Recurse 18 6 Ket 20 6 Cond - 22 1 Cond ref + 22 1 Capture ref 24 74 Recurse 26 6 Ket 28 6 Cond - 30 1 Cond ref + 30 1 Capture ref 32 74 Recurse 34 6 Ket 36 6 Cond - 38 1 Cond ref + 38 1 Capture ref 40 74 Recurse 42 6 Ket 44 6 Cond - 46 1 Cond ref + 46 1 Capture ref 48 74 Recurse 50 6 Ket 52 6 Cond - 54 1 Cond ref + 54 1 Capture ref 56 74 Recurse 58 6 Ket 60 10 SBraPos 62 6 SCond - 64 1 Cond ref + 64 1 Capture ref 66 74 Recurse 68 6 Ket 70 10 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 43 Bra 2 34 Once 4 4 Cond - 6 1 Cond ref + 6 1 Capture ref 8 8 Alt 10 a 12 38 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 16 12 Ket 18 16 SBraPos 20 4 SCond - 22 1 Cond ref + 22 1 Capture ref 24 8 Alt 26 a 28 38 Recurse diff --git a/testdata/testoutput8-32-4 b/testdata/testoutput8-32-4 index d76f3aa..e9865c8 100644 --- a/testdata/testoutput8-32-4 +++ b/testdata/testoutput8-32-4 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 5 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 212 -Memory allocation - code portion : 76 +Memory allocation - code size : 76 ------------------------------------------------------------------ 0 16 Bra 2 7 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 76 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 208 -Memory allocation - code portion : 72 +Memory allocation - code size : 72 ------------------------------------------------------------------ 0 15 Bra 2 6 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 72 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 2 Bra 2 2 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 176 -Memory allocation - code portion : 40 +Memory allocation - code size : 40 ------------------------------------------------------------------ 0 7 Bra 2 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 40 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 356 -Memory allocation - code portion : 220 +Memory allocation - code size : 220 ------------------------------------------------------------------ 0 52 Bra 2 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 220 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 3432 -Memory allocation - code portion : 3296 +Memory allocation - code size : 3296 ------------------------------------------------------------------ 0 821 Bra 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 3296 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 3392 -Memory allocation - code portion : 3256 +Memory allocation - code size : 3256 ------------------------------------------------------------------ 0 811 Bra 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 3256 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 200 -Memory allocation - code portion : 64 +Memory allocation - code size : 64 ------------------------------------------------------------------ 0 13 Bra 2 9 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 64 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 216 -Memory allocation - code portion : 80 +Memory allocation - code size : 80 ------------------------------------------------------------------ 0 17 Bra 2 13 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 80 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 348 -Memory allocation - code portion : 108 +Memory allocation - code size : 108 +Memory allocation - data size : 104 ------------------------------------------------------------------ 0 24 Bra 2 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 108 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 300 -Memory allocation - code portion : 128 +Memory allocation - code size : 128 +Memory allocation - data size : 36 ------------------------------------------------------------------ 0 29 Bra 2 18 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 128 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 256 -Memory allocation - code portion : 108 +Memory allocation - code size : 108 +Memory allocation - data size : 12 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 108 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 236 -Memory allocation - code portion : 100 +Memory allocation - code size : 100 ------------------------------------------------------------------ 0 22 Bra 2 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 100 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 292 -Memory allocation - code portion : 156 +Memory allocation - code size : 156 ------------------------------------------------------------------ 0 36 Bra 2 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 156 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 28 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 8 Bra 2 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \x{c5b4} Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 8 Bra 2 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x{8a9e} Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 212 -Memory allocation - code portion : 76 +Memory allocation - code size : 76 ------------------------------------------------------------------ 0 16 Bra 2 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 76 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 52 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 188 -Memory allocation - code portion : 52 +Memory allocation - code size : 52 ------------------------------------------------------------------ 0 10 Bra 2 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 52 Failed: error 106 at offset 13: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 224 -Memory allocation - code portion : 88 +Memory allocation - code size : 88 ------------------------------------------------------------------ 0 19 Bra - 2 [a-c\p{L}\x{660}] + 2 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 19 19 Ket 21 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 220 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 ------------------------------------------------------------------ 0 18 Bra - 2 [+\-\p{Nd}]++ + 2 [+\-0-9\p{Nd}]++ 18 18 Ket 20 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 196 -Memory allocation - code portion : 60 +Memory allocation - code size : 60 ------------------------------------------------------------------ 0 12 Bra 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 60 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 9 Bra 2 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 48 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 240 -Memory allocation - code portion : 104 +Memory allocation - code size : 104 ------------------------------------------------------------------ 0 23 Bra 2 19 CBra 1 5 Brazero 6 13 SCBra 2 9 6 Cond - 11 1 Cond ref + 11 1 Capture ref 13 0 15 2 Alt 17 8 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 104 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 220 -Memory allocation - code portion : 84 +Memory allocation - code size : 84 ------------------------------------------------------------------ 0 18 Bra 2 14 CBra 1 5 Brazero 6 6 SCond - 8 1 Cond ref + 8 1 Capture ref 10 0 12 2 Alt 14 8 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 84 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra 2 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^a] + 2 [^a] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 4 Bra - 2 [^\x{aa}] + 2 [^\x{aa}] (not) 4 4 Ket 6 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 79 Bra 2 70 Once 4 6 Cond - 6 1 Cond ref + 6 1 Capture ref 8 74 Recurse 10 6 Ket 12 6 Cond - 14 1 Cond ref + 14 1 Capture ref 16 74 Recurse 18 6 Ket 20 6 Cond - 22 1 Cond ref + 22 1 Capture ref 24 74 Recurse 26 6 Ket 28 6 Cond - 30 1 Cond ref + 30 1 Capture ref 32 74 Recurse 34 6 Ket 36 6 Cond - 38 1 Cond ref + 38 1 Capture ref 40 74 Recurse 42 6 Ket 44 6 Cond - 46 1 Cond ref + 46 1 Capture ref 48 74 Recurse 50 6 Ket 52 6 Cond - 54 1 Cond ref + 54 1 Capture ref 56 74 Recurse 58 6 Ket 60 10 SBraPos 62 6 SCond - 64 1 Cond ref + 64 1 Capture ref 66 74 Recurse 68 6 Ket 70 10 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 43 Bra 2 34 Once 4 4 Cond - 6 1 Cond ref + 6 1 Capture ref 8 8 Alt 10 a 12 38 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 16 12 Ket 18 16 SBraPos 20 4 SCond - 22 1 Cond ref + 22 1 Capture ref 24 8 Alt 26 a 28 38 Recurse diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2 index f3811d9..0ebc4d0 100644 --- a/testdata/testoutput8-8-2 +++ b/testdata/testoutput8-8-2 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 153 -Memory allocation - code portion : 17 +Memory allocation - code size : 17 ------------------------------------------------------------------ 0 13 Bra 3 7 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 17 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 161 -Memory allocation - code portion : 25 +Memory allocation - code size : 25 ------------------------------------------------------------------ 0 21 Bra 3 9 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 25 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 159 -Memory allocation - code portion : 23 +Memory allocation - code size : 23 ------------------------------------------------------------------ 0 19 Bra 3 7 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 23 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 177 -Memory allocation - code portion : 41 +Memory allocation - code size : 41 ------------------------------------------------------------------ 0 37 Bra 3 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 41 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 143 -Memory allocation - code portion : 7 +Memory allocation - code size : 7 ------------------------------------------------------------------ 0 3 Bra 3 3 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra 3 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 9 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra 3 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 9 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 9 Bra 3 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 14 Bra 3 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 256 -Memory allocation - code portion : 120 +Memory allocation - code size : 120 ------------------------------------------------------------------ 0 116 Bra 3 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 120 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 962 -Memory allocation - code portion : 826 +Memory allocation - code size : 826 ------------------------------------------------------------------ 0 822 Bra 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 826 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 952 -Memory allocation - code portion : 816 +Memory allocation - code size : 816 ------------------------------------------------------------------ 0 812 Bra 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 816 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 158 -Memory allocation - code portion : 22 +Memory allocation - code size : 22 ------------------------------------------------------------------ 0 18 Bra 3 12 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 22 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 24 Bra 3 18 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 200 -Memory allocation - code portion : 36 +Memory allocation - code size : 36 +Memory allocation - data size : 28 ------------------------------------------------------------------ 0 32 Bra 3 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 36 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 193 -Memory allocation - code portion : 45 +Memory allocation - code size : 45 +Memory allocation - data size : 12 ------------------------------------------------------------------ 0 41 Bra 3 25 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 45 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 174 -Memory allocation - code portion : 34 +Memory allocation - code size : 34 +Memory allocation - data size : 4 ------------------------------------------------------------------ 0 30 Bra 3 7 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 34 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 167 -Memory allocation - code portion : 31 +Memory allocation - code size : 31 ------------------------------------------------------------------ 0 27 Bra 3 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 31 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 189 -Memory allocation - code portion : 53 +Memory allocation - code size : 53 ------------------------------------------------------------------ 0 49 Bra 3 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 53 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 7 Bra 3 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 11 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 8 Bra 3 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 8 Bra 3 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 8 Bra 3 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 12 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 14 Bra 3 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 155 -Memory allocation - code portion : 19 +Memory allocation - code size : 19 ------------------------------------------------------------------ 0 15 Bra 3 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \xb4 Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 155 -Memory allocation - code portion : 19 +Memory allocation - code size : 19 ------------------------------------------------------------------ 0 15 Bra 3 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x9e Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 183 -Memory allocation - code portion : 47 +Memory allocation - code size : 47 ------------------------------------------------------------------ 0 43 Bra 3 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 47 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 14 Bra 3 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 14 Bra 3 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 18 Failed: error 106 at offset 15: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 11 Bra 3 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 11 Bra 3 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 11 Bra 3 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 11 Bra 3 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 186 -Memory allocation - code portion : 50 +Memory allocation - code size : 50 ------------------------------------------------------------------ 0 46 Bra - 3 [a-c\p{L}\x{660}] + 3 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 46 46 Ket 49 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 11 Bra 3 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 184 -Memory allocation - code portion : 48 +Memory allocation - code size : 48 ------------------------------------------------------------------ 0 44 Bra - 3 [+\-\p{Nd}]++ + 3 [+\-0-9\p{Nd}]++ 44 44 Ket 47 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 161 -Memory allocation - code portion : 25 +Memory allocation - code size : 25 ------------------------------------------------------------------ 0 21 Bra 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 25 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 161 -Memory allocation - code portion : 25 +Memory allocation - code size : 25 ------------------------------------------------------------------ 0 21 Bra 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 25 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 153 -Memory allocation - code portion : 17 +Memory allocation - code size : 17 ------------------------------------------------------------------ 0 13 Bra 3 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 17 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 174 -Memory allocation - code portion : 38 +Memory allocation - code size : 38 ------------------------------------------------------------------ 0 34 Bra 3 28 CBra 1 8 Brazero 9 19 SCBra 2 14 8 Cond - 17 1 Cond ref + 17 1 Capture ref 20 0 22 3 Alt 25 11 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 38 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 26 Bra 3 20 CBra 1 8 Brazero 9 8 SCond - 12 1 Cond ref + 12 1 Capture ref 15 0 17 3 Alt 20 11 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 9 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra 3 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 9 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra 3 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 9 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra 3 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 10 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra - 3 [^a] + 3 [^a] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra - 3 [^a] + 3 [^a] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 5 Bra - 3 [^\x{aa}] + 3 [^\x{aa}] (not) 5 5 Ket 8 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 146 -Memory allocation - code portion : 10 +Memory allocation - code size : 10 ------------------------------------------------------------------ 0 6 Bra - 3 [^\x{aa}] + 3 [^\x{aa}] (not) 6 6 Ket 9 End ------------------------------------------------------------------ @@ -913,36 +860,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 119 Bra 3 105 Once 6 9 Cond - 9 1 Cond ref + 9 1 Capture ref 12 111 Recurse 15 9 Ket 18 9 Cond - 21 1 Cond ref + 21 1 Capture ref 24 111 Recurse 27 9 Ket 30 9 Cond - 33 1 Cond ref + 33 1 Capture ref 36 111 Recurse 39 9 Ket 42 9 Cond - 45 1 Cond ref + 45 1 Capture ref 48 111 Recurse 51 9 Ket 54 9 Cond - 57 1 Cond ref + 57 1 Capture ref 60 111 Recurse 63 9 Ket 66 9 Cond - 69 1 Cond ref + 69 1 Capture ref 72 111 Recurse 75 9 Ket 78 9 Cond - 81 1 Cond ref + 81 1 Capture ref 84 111 Recurse 87 9 Ket 90 15 SBraPos 93 9 SCond - 96 1 Cond ref + 96 1 Capture ref 99 111 Recurse 102 9 Ket 105 15 KetRpos @@ -965,7 +912,7 @@ Subject length lower bound = 0 0 61 Bra 3 47 Once 6 6 Cond - 9 1 Cond ref + 9 1 Capture ref 12 10 Alt 15 a 17 53 Recurse @@ -973,7 +920,7 @@ Subject length lower bound = 0 22 16 Ket 25 22 SBraPos 28 6 SCond - 31 1 Cond ref + 31 1 Capture ref 34 10 Alt 37 a 39 53 Recurse diff --git a/testdata/testoutput8-8-3 b/testdata/testoutput8-8-3 index 48e0b8a..04b3a3e 100644 --- a/testdata/testoutput8-8-3 +++ b/testdata/testoutput8-8-3 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 16 Bra 4 8 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 166 -Memory allocation - code portion : 30 +Memory allocation - code size : 30 ------------------------------------------------------------------ 0 25 Bra 4 10 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 30 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 164 -Memory allocation - code portion : 28 +Memory allocation - code size : 28 ------------------------------------------------------------------ 0 23 Bra 4 8 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 28 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 179 -Memory allocation - code portion : 43 +Memory allocation - code size : 43 ------------------------------------------------------------------ 0 38 Bra 4 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 43 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 145 -Memory allocation - code portion : 9 +Memory allocation - code size : 9 ------------------------------------------------------------------ 0 4 Bra 4 4 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra 4 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra 4 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 11 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra 4 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 11 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 10 Bra 4 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 158 -Memory allocation - code portion : 22 +Memory allocation - code size : 22 ------------------------------------------------------------------ 0 17 Bra 4 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 22 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 268 -Memory allocation - code portion : 132 +Memory allocation - code size : 132 ------------------------------------------------------------------ 0 127 Bra 4 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 132 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 964 -Memory allocation - code portion : 828 +Memory allocation - code size : 828 ------------------------------------------------------------------ 0 823 Bra 4 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 828 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 954 -Memory allocation - code portion : 818 +Memory allocation - code size : 818 ------------------------------------------------------------------ 0 813 Bra 4 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 818 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 163 -Memory allocation - code portion : 27 +Memory allocation - code size : 27 ------------------------------------------------------------------ 0 22 Bra 4 14 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 27 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 171 -Memory allocation - code portion : 35 +Memory allocation - code size : 35 ------------------------------------------------------------------ 0 30 Bra 4 22 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 35 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 207 -Memory allocation - code portion : 43 +Memory allocation - code size : 43 +Memory allocation - data size : 28 ------------------------------------------------------------------ 0 38 Bra 4 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 43 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 203 -Memory allocation - code portion : 55 +Memory allocation - code size : 55 +Memory allocation - data size : 12 ------------------------------------------------------------------ 0 50 Bra 4 30 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 55 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 179 -Memory allocation - code portion : 39 +Memory allocation - code size : 39 +Memory allocation - data size : 4 ------------------------------------------------------------------ 0 34 Bra 4 8 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 39 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 173 -Memory allocation - code portion : 37 +Memory allocation - code size : 37 ------------------------------------------------------------------ 0 32 Bra 4 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 37 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 203 -Memory allocation - code portion : 67 +Memory allocation - code size : 67 ------------------------------------------------------------------ 0 62 Bra 4 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 67 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 8 Bra 4 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 9 Bra 4 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 9 Bra 4 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 9 Bra 4 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 14 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 15 Bra 4 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 16 Bra 4 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \xb4 Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 16 Bra 4 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x9e Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 186 -Memory allocation - code portion : 50 +Memory allocation - code size : 50 ------------------------------------------------------------------ 0 45 Bra 4 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 50 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 16 Bra 4 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 16 Bra 4 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 21 Failed: error 106 at offset 15: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 13 Bra 4 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 13 Bra 4 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 13 Bra 4 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 13 Bra 4 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 189 -Memory allocation - code portion : 53 +Memory allocation - code size : 53 ------------------------------------------------------------------ 0 48 Bra - 4 [a-c\p{L}\x{660}] + 4 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 48 48 Ket 52 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 154 -Memory allocation - code portion : 18 +Memory allocation - code size : 18 ------------------------------------------------------------------ 0 13 Bra 4 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 18 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 187 -Memory allocation - code portion : 51 +Memory allocation - code size : 51 ------------------------------------------------------------------ 0 46 Bra - 4 [+\-\p{Nd}]++ + 4 [+\-0-9\p{Nd}]++ 46 46 Ket 50 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 163 -Memory allocation - code portion : 27 +Memory allocation - code size : 27 ------------------------------------------------------------------ 0 22 Bra 4 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 27 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 163 -Memory allocation - code portion : 27 +Memory allocation - code size : 27 ------------------------------------------------------------------ 0 22 Bra 4 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 27 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 156 -Memory allocation - code portion : 20 +Memory allocation - code size : 20 ------------------------------------------------------------------ 0 15 Bra 4 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 20 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 183 -Memory allocation - code portion : 47 +Memory allocation - code size : 47 ------------------------------------------------------------------ 0 42 Bra 4 34 CBra 1 10 Brazero 11 23 SCBra 2 17 9 Cond - 21 1 Cond ref + 21 1 Capture ref 24 0 26 4 Alt 30 13 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 47 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 173 -Memory allocation - code portion : 37 +Memory allocation - code size : 37 ------------------------------------------------------------------ 0 32 Bra 4 24 CBra 1 10 Brazero 11 9 SCond - 15 1 Cond ref + 15 1 Capture ref 18 0 20 4 Alt 24 13 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 37 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra 4 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 11 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra 4 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 11 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra 4 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 11 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra 4 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 12 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra - 4 [^a] + 4 [^a] (not) 6 6 Ket 10 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra - 4 [^a] + 4 [^a] (not) 6 6 Ket 10 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 6 Bra - 4 [^\x{aa}] + 4 [^\x{aa}] (not) 6 6 Ket 10 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 148 -Memory allocation - code portion : 12 +Memory allocation - code size : 12 ------------------------------------------------------------------ 0 7 Bra - 4 [^\x{aa}] + 4 [^\x{aa}] (not) 7 7 Ket 11 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 150 Bra 4 132 Once 8 11 Cond - 12 1 Cond ref + 12 1 Capture ref 15 140 Recurse 19 11 Ket 23 11 Cond - 27 1 Cond ref + 27 1 Capture ref 30 140 Recurse 34 11 Ket 38 11 Cond - 42 1 Cond ref + 42 1 Capture ref 45 140 Recurse 49 11 Ket 53 11 Cond - 57 1 Cond ref + 57 1 Capture ref 60 140 Recurse 64 11 Ket 68 11 Cond - 72 1 Cond ref + 72 1 Capture ref 75 140 Recurse 79 11 Ket 83 11 Cond - 87 1 Cond ref + 87 1 Capture ref 90 140 Recurse 94 11 Ket 98 11 Cond -102 1 Cond ref +102 1 Capture ref 105 140 Recurse 109 11 Ket 113 19 SBraPos 117 11 SCond -121 1 Cond ref +121 1 Capture ref 124 140 Recurse 128 11 Ket 132 19 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 76 Bra 4 58 Once 8 7 Cond - 12 1 Cond ref + 12 1 Capture ref 15 12 Alt 19 a 21 66 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 27 19 Ket 31 27 SBraPos 35 7 SCond - 39 1 Cond ref + 39 1 Capture ref 42 12 Alt 46 a 48 66 Recurse diff --git a/testdata/testoutput8-8-4 b/testdata/testoutput8-8-4 index 81cf0f7..42119f7 100644 --- a/testdata/testoutput8-8-4 +++ b/testdata/testoutput8-8-4 @@ -10,8 +10,7 @@ #pattern fullbincode,memory /((?i)b)/ -Memory allocation - compiled block : 161 -Memory allocation - code portion : 25 +Memory allocation - code size : 25 ------------------------------------------------------------------ 0 19 Bra 5 9 CBra 1 @@ -22,8 +21,7 @@ Memory allocation - code portion : 25 ------------------------------------------------------------------ /(?s)(.*X|^B)/ -Memory allocation - compiled block : 171 -Memory allocation - code portion : 35 +Memory allocation - code size : 35 ------------------------------------------------------------------ 0 29 Bra 5 11 CBra 1 @@ -38,8 +36,7 @@ Memory allocation - code portion : 35 ------------------------------------------------------------------ /(?s:.*X|^B)/ -Memory allocation - compiled block : 169 -Memory allocation - code portion : 33 +Memory allocation - code size : 33 ------------------------------------------------------------------ 0 27 Bra 5 9 Bra @@ -54,8 +51,7 @@ Memory allocation - code portion : 33 ------------------------------------------------------------------ /^[[:alnum:]]/ -Memory allocation - compiled block : 181 -Memory allocation - code portion : 45 +Memory allocation - code size : 45 ------------------------------------------------------------------ 0 39 Bra 5 ^ @@ -65,8 +61,7 @@ Memory allocation - code portion : 45 ------------------------------------------------------------------ /#/Ix -Memory allocation - compiled block : 147 -Memory allocation - code portion : 11 +Memory allocation - code size : 11 ------------------------------------------------------------------ 0 5 Bra 5 5 Ket @@ -78,8 +73,7 @@ Options: extended Subject length lower bound = 0 /a#/Ix -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra 5 a @@ -92,8 +86,7 @@ First code unit = 'a' Subject length lower bound = 1 /x?+/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra 5 x?+ @@ -102,8 +95,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /x++/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra 5 x++ @@ -112,8 +104,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /x{1,3}+/ -Memory allocation - compiled block : 153 -Memory allocation - code portion : 17 +Memory allocation - code size : 17 ------------------------------------------------------------------ 0 11 Bra 5 x @@ -123,8 +114,7 @@ Memory allocation - code portion : 17 ------------------------------------------------------------------ /(x)*+/ -Memory allocation - compiled block : 162 -Memory allocation - code portion : 26 +Memory allocation - code size : 26 ------------------------------------------------------------------ 0 20 Bra 5 Braposzero @@ -136,8 +126,7 @@ Memory allocation - code portion : 26 ------------------------------------------------------------------ /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ -Memory allocation - compiled block : 280 -Memory allocation - code portion : 144 +Memory allocation - code size : 144 ------------------------------------------------------------------ 0 138 Bra 5 ^ @@ -160,8 +149,7 @@ Memory allocation - code portion : 144 ------------------------------------------------------------------ "8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 966 -Memory allocation - code portion : 830 +Memory allocation - code size : 830 ------------------------------------------------------------------ 0 824 Bra 5 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -171,8 +159,7 @@ Memory allocation - code portion : 830 ------------------------------------------------------------------ "\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" -Memory allocation - compiled block : 956 -Memory allocation - code portion : 820 +Memory allocation - code size : 820 ------------------------------------------------------------------ 0 814 Bra 5 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X @@ -182,8 +169,7 @@ Memory allocation - code portion : 820 ------------------------------------------------------------------ /(a(?1)b)/ -Memory allocation - compiled block : 168 -Memory allocation - code portion : 32 +Memory allocation - code size : 32 ------------------------------------------------------------------ 0 26 Bra 5 16 CBra 1 @@ -196,8 +182,7 @@ Memory allocation - code portion : 32 ------------------------------------------------------------------ /(a(?1)+b)/ -Memory allocation - compiled block : 178 -Memory allocation - code portion : 42 +Memory allocation - code size : 42 ------------------------------------------------------------------ 0 36 Bra 5 26 CBra 1 @@ -212,8 +197,8 @@ Memory allocation - code portion : 42 ------------------------------------------------------------------ /a(?Pb|c)d(?Pe)/ -Memory allocation - compiled block : 214 -Memory allocation - code portion : 50 +Memory allocation - code size : 50 +Memory allocation - data size : 28 ------------------------------------------------------------------ 0 44 Bra 5 a @@ -231,8 +216,8 @@ Memory allocation - code portion : 50 ------------------------------------------------------------------ /(?:a(?Pc(?Pd)))(?Pa)/ -Memory allocation - compiled block : 213 -Memory allocation - code portion : 65 +Memory allocation - code size : 65 +Memory allocation - data size : 12 ------------------------------------------------------------------ 0 59 Bra 5 35 Bra @@ -252,8 +237,8 @@ Memory allocation - code portion : 65 ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/ -Memory allocation - compiled block : 184 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 +Memory allocation - data size : 4 ------------------------------------------------------------------ 0 38 Bra 5 9 CBra 1 @@ -271,8 +256,7 @@ Memory allocation - code portion : 44 ------------------------------------------------------------------ /abc(?C255)de(?C)f/ -Memory allocation - compiled block : 179 -Memory allocation - code portion : 43 +Memory allocation - code size : 43 ------------------------------------------------------------------ 0 37 Bra 5 abc @@ -285,8 +269,7 @@ Memory allocation - code portion : 43 ------------------------------------------------------------------ /abcde/auto_callout -Memory allocation - compiled block : 217 -Memory allocation - code portion : 81 +Memory allocation - code size : 81 ------------------------------------------------------------------ 0 75 Bra 5 Callout 255 0 1 @@ -305,8 +288,7 @@ Memory allocation - code portion : 81 ------------------------------------------------------------------ /\x{100}/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{100} @@ -315,8 +297,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{1000}/utf -Memory allocation - compiled block : 151 -Memory allocation - code portion : 15 +Memory allocation - code size : 15 ------------------------------------------------------------------ 0 9 Bra 5 \x{1000} @@ -325,8 +306,7 @@ Memory allocation - code portion : 15 ------------------------------------------------------------------ /\x{10000}/utf -Memory allocation - compiled block : 152 -Memory allocation - code portion : 16 +Memory allocation - code size : 16 ------------------------------------------------------------------ 0 10 Bra 5 \x{10000} @@ -335,8 +315,7 @@ Memory allocation - code portion : 16 ------------------------------------------------------------------ /\x{100000}/utf -Memory allocation - compiled block : 152 -Memory allocation - code portion : 16 +Memory allocation - code size : 16 ------------------------------------------------------------------ 0 10 Bra 5 \x{100000} @@ -345,8 +324,7 @@ Memory allocation - code portion : 16 ------------------------------------------------------------------ /\x{10ffff}/utf -Memory allocation - compiled block : 152 -Memory allocation - code portion : 16 +Memory allocation - code size : 16 ------------------------------------------------------------------ 0 10 Bra 5 \x{10ffff} @@ -358,8 +336,7 @@ Memory allocation - code portion : 16 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /[\x{ff}]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{ff} @@ -368,8 +345,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[\x{100}]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{100} @@ -378,8 +354,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x80/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{80} @@ -388,8 +363,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\xff/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{ff} @@ -398,8 +372,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /\x{0041}\x{2262}\x{0391}\x{002e}/I,utf -Memory allocation - compiled block : 158 -Memory allocation - code portion : 22 +Memory allocation - code size : 22 ------------------------------------------------------------------ 0 16 Bra 5 A\x{2262}\x{391}. @@ -413,8 +386,7 @@ Last code unit = '.' Subject length lower bound = 4 /\x{D55c}\x{ad6d}\x{C5B4}/I,utf -Memory allocation - compiled block : 159 -Memory allocation - code portion : 23 +Memory allocation - code size : 23 ------------------------------------------------------------------ 0 17 Bra 5 \x{d55c}\x{ad6d}\x{c5b4} @@ -428,8 +400,7 @@ Last code unit = \xb4 Subject length lower bound = 3 /\x{65e5}\x{672c}\x{8a9e}/I,utf -Memory allocation - compiled block : 159 -Memory allocation - code portion : 23 +Memory allocation - code size : 23 ------------------------------------------------------------------ 0 17 Bra 5 \x{65e5}\x{672c}\x{8a9e} @@ -443,8 +414,7 @@ Last code unit = \x9e Subject length lower bound = 3 /[\x{100}]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{100} @@ -453,8 +423,7 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[Z\x{100}]/utf -Memory allocation - compiled block : 189 -Memory allocation - code portion : 53 +Memory allocation - code size : 53 ------------------------------------------------------------------ 0 47 Bra 5 [Z\x{100}] @@ -463,8 +432,7 @@ Memory allocation - code portion : 53 ------------------------------------------------------------------ /^[\x{100}\E-\Q\E\x{150}]/utf -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 18 Bra 5 ^ @@ -474,8 +442,7 @@ Memory allocation - code portion : 24 ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/utf -Memory allocation - compiled block : 160 -Memory allocation - code portion : 24 +Memory allocation - code size : 24 ------------------------------------------------------------------ 0 18 Bra 5 ^ @@ -488,8 +455,7 @@ Memory allocation - code portion : 24 Failed: error 106 at offset 15: missing terminating ] for character class /[\p{L}]/ -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 15 Bra 5 [\p{L}] @@ -498,8 +464,7 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /[\p{^L}]/ -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 15 Bra 5 [\P{L}] @@ -508,8 +473,7 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /[\P{L}]/ -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 15 Bra 5 [\P{L}] @@ -518,8 +482,7 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /[\P{^L}]/ -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 15 Bra 5 [\p{L}] @@ -528,18 +491,16 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /[abc\p{L}\x{0660}]/utf -Memory allocation - compiled block : 192 -Memory allocation - code portion : 56 +Memory allocation - code size : 56 ------------------------------------------------------------------ 0 50 Bra - 5 [a-c\p{L}\x{660}] + 5 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff\p{L}\x{660}] 50 50 Ket 55 End ------------------------------------------------------------------ /[\p{Nd}]/utf -Memory allocation - compiled block : 157 -Memory allocation - code portion : 21 +Memory allocation - code size : 21 ------------------------------------------------------------------ 0 15 Bra 5 [\p{Nd}] @@ -548,18 +509,16 @@ Memory allocation - code portion : 21 ------------------------------------------------------------------ /[\p{Nd}+-]+/utf -Memory allocation - compiled block : 190 -Memory allocation - code portion : 54 +Memory allocation - code size : 54 ------------------------------------------------------------------ 0 48 Bra - 5 [+\-\p{Nd}]++ + 5 [+\-0-9\p{Nd}]++ 48 48 Ket 53 End ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf -Memory allocation - compiled block : 165 -Memory allocation - code portion : 29 +Memory allocation - code size : 29 ------------------------------------------------------------------ 0 23 Bra 5 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -568,8 +527,7 @@ Memory allocation - code portion : 29 ------------------------------------------------------------------ /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf -Memory allocation - compiled block : 165 -Memory allocation - code portion : 29 +Memory allocation - code size : 29 ------------------------------------------------------------------ 0 23 Bra 5 A\x{391}\x{10427}\x{ff3a}\x{1fb0} @@ -578,8 +536,7 @@ Memory allocation - code portion : 29 ------------------------------------------------------------------ /[\x{105}-\x{109}]/i,utf -Memory allocation - compiled block : 159 -Memory allocation - code portion : 23 +Memory allocation - code size : 23 ------------------------------------------------------------------ 0 17 Bra 5 [\x{104}-\x{109}] @@ -588,15 +545,14 @@ Memory allocation - code portion : 23 ------------------------------------------------------------------ /( ( (?(1)0|) )* )/x -Memory allocation - compiled block : 192 -Memory allocation - code portion : 56 +Memory allocation - code size : 56 ------------------------------------------------------------------ 0 50 Bra 5 40 CBra 1 12 Brazero 13 27 SCBra 2 20 10 Cond - 25 1 Cond ref + 25 1 Capture ref 28 0 30 5 Alt 35 15 Ket @@ -607,14 +563,13 @@ Memory allocation - code portion : 56 ------------------------------------------------------------------ /( (?(1)0|)* )/x -Memory allocation - compiled block : 180 -Memory allocation - code portion : 44 +Memory allocation - code size : 44 ------------------------------------------------------------------ 0 38 Bra 5 28 CBra 1 12 Brazero 13 10 SCond - 18 1 Cond ref + 18 1 Capture ref 21 0 23 5 Alt 28 15 KetRmax @@ -624,8 +579,7 @@ Memory allocation - code portion : 44 ------------------------------------------------------------------ /[a]/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra 5 a @@ -634,8 +588,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /[a]/utf -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra 5 a @@ -644,8 +597,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /[\xaa]/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra 5 \x{aa} @@ -654,8 +606,7 @@ Memory allocation - code portion : 13 ------------------------------------------------------------------ /[\xaa]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra 5 \x{aa} @@ -664,41 +615,37 @@ Memory allocation - code portion : 14 ------------------------------------------------------------------ /[^a]/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra - 5 [^a] + 5 [^a] (not) 7 7 Ket 12 End ------------------------------------------------------------------ /[^a]/utf -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra - 5 [^a] + 5 [^a] (not) 7 7 Ket 12 End ------------------------------------------------------------------ /[^\xaa]/ -Memory allocation - compiled block : 149 -Memory allocation - code portion : 13 +Memory allocation - code size : 13 ------------------------------------------------------------------ 0 7 Bra - 5 [^\x{aa}] + 5 [^\x{aa}] (not) 7 7 Ket 12 End ------------------------------------------------------------------ /[^\xaa]/utf -Memory allocation - compiled block : 150 -Memory allocation - code portion : 14 +Memory allocation - code size : 14 ------------------------------------------------------------------ 0 8 Bra - 5 [^\x{aa}] + 5 [^\x{aa}] (not) 8 8 Ket 13 End ------------------------------------------------------------------ @@ -912,36 +859,36 @@ Failed: error 186 at offset 12820: regular expression is too complicated 0 181 Bra 5 159 Once 10 13 Cond - 15 1 Cond ref + 15 1 Capture ref 18 169 Recurse 23 13 Ket 28 13 Cond - 33 1 Cond ref + 33 1 Capture ref 36 169 Recurse 41 13 Ket 46 13 Cond - 51 1 Cond ref + 51 1 Capture ref 54 169 Recurse 59 13 Ket 64 13 Cond - 69 1 Cond ref + 69 1 Capture ref 72 169 Recurse 77 13 Ket 82 13 Cond - 87 1 Cond ref + 87 1 Capture ref 90 169 Recurse 95 13 Ket 100 13 Cond -105 1 Cond ref +105 1 Capture ref 108 169 Recurse 113 13 Ket 118 13 Cond -123 1 Cond ref +123 1 Capture ref 126 169 Recurse 131 13 Ket 136 23 SBraPos 141 13 SCond -146 1 Cond ref +146 1 Capture ref 149 169 Recurse 154 13 Ket 159 23 KetRpos @@ -964,7 +911,7 @@ Subject length lower bound = 0 0 91 Bra 5 69 Once 10 8 Cond - 15 1 Cond ref + 15 1 Capture ref 18 14 Alt 23 a 25 79 Recurse @@ -972,7 +919,7 @@ Subject length lower bound = 0 32 22 Ket 37 32 SBraPos 42 8 SCond - 47 1 Cond ref + 47 1 Capture ref 50 14 Alt 55 a 57 79 Recurse diff --git a/testdata/testoutput9 b/testdata/testoutput9 index 1ec4317..7845dc1 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -4,17 +4,38 @@ #forbid_utf #newline_default lf any anycrlf -/ab/ -\= Expect error message (too big char) and no match - A\x{123}B +/a\xc4\xa3b/ + a\N{U+123}b + 0: a\xc4\xa3b +\= Expect no match # error message (too big char) + a\x{0123}b ** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. ** Truncation will probably give the wrong result. No match - A\o{443}B + a\o{00443}b ** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. ** Truncation will probably give the wrong result. No match - + a\443b +** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +/fd bf bf bf bf bf/I,hex +Capture group count = 0 +First code unit = \xfd +Last code unit = \xbf +Subject length lower bound = 6 +\= Expect warning + \N{U+7fffffff} +** Warning: character \N{U+7fffffff} is greater than 0x10ffff and should not be encoded as UTF-8 + 0: \xfd\xbf\xbf\xbf\xbf\xbf +\= Expect no match # error message (too big char) + \x{7fffffff} +** Character \x{7fffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +No match + /\x{100}/I Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large @@ -218,14 +239,14 @@ Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too Capture group count = 0 Contains explicit CR or LF match Options: extended -Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 - 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f +Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 + 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f Subject length lower bound = 3 /\h/I Capture group count = 0 -Starting code units: \x09 \x20 \xa0 +Starting code units: \x09 \x20 \xa0 Subject length lower bound = 1 /\H/I @@ -234,7 +255,7 @@ Subject length lower bound = 1 /\v/I Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 +Starting code units: \x0a \x0b \x0c \x0d \x85 Subject length lower bound = 1 /\V/I @@ -243,7 +264,7 @@ Subject length lower bound = 1 /\R/I Capture group count = 0 -Starting code units: \x0a \x0b \x0c \x0d \x85 +Starting code units: \x0a \x0b \x0c \x0d \x85 Subject length lower bound = 1 /[\h]/B @@ -285,7 +306,7 @@ Subject length lower bound = 1 /[^\h]/B ------------------------------------------------------------------ Bra - [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg) + [^\x09 \xa0] Ket End ------------------------------------------------------------------ @@ -336,8 +357,8 @@ Failed: error 177 at offset 7: character code point value in \u.... sequence is /[^\x00-a]{12,}[^b-\xff]*/B ------------------------------------------------------------------ Bra - [b-\xff] (neg){12,}+ - [\x00-a] (neg)*+ + [^\x00-a]{12,}+ + [^b-\xff]*+ Ket End ------------------------------------------------------------------ @@ -345,16 +366,16 @@ Failed: error 177 at offset 7: character code point value in \u.... sequence is /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B ------------------------------------------------------------------ Bra - [\x00-\x08\x0e-\x1f!-\xff] (neg)*+ + [^\x09-\x0d ]*+ \s* [0-9A-Z_a-z]++ \W+ - [\x00-/:-\xff] (neg)*+ + [^0-9]*+ \d 0 - [\x00-/:-@[-^`{-\xff] (neg){4,6}+ + [^0-9A-Z_a-z]{4,6}+ \w* A Ket @@ -371,4 +392,17 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode A\x{1b6}6666666 +# Should cause an error +/abc/substitute_extended,replace=>\777< + abc +Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string + +# Should cause an error +/abc/substitute_extended,replace=>\o{012345}< + abc +Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string + +/i/turkish_casing +Failed: error 204 at offset 0: PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode + # End of testinput9 diff --git a/testdata/wintestinput3 b/testdata/wintestinput3 index 8d8017a..15580cf 100644 --- a/testdata/wintestinput3 +++ b/testdata/wintestinput3 @@ -1,5 +1,5 @@ # This set of tests checks local-specific features, using the "fr_FR" locale. -# It is not Perl-compatible. When run via RunTest, the locale is edited to +# It is almost Perl-compatible. When run via RunTest, the locale is edited to # be whichever of "fr_FR", "french", or "fr" is found to exist. There is # different version of this file called wintestinput3 for use on Windows, # where the locale is called "french" and the tests are run using @@ -8,35 +8,31 @@ #forbid_utf /^[\w]+/ - *** Failers +\= Expect no match École /^[\w]+/locale=french École -/^[\w]+/ - *** Failers - École - /^[\W]+/ École /^[\W]+/locale=french - *** Failers +\= Expect no match École /[\b]/ \b - *** Failers +\= Expect no match a /[\b]/locale=french \b - *** Failers +\= Expect no match a /^\w+/ - *** Failers +\= Expect no match École /^\w+/locale=french @@ -46,12 +42,12 @@ École /(.+)\b(.+)/locale=french - *** Failers +\= Expect no match École /École/i École - *** Failers +\= Expect no match école /École/i,locale=french @@ -72,9 +68,22 @@ /^[\xc8-\xc9]/ École - *** Failers +\= Expect no match école +/\xb5/i + µ +\= Expect no match + \x9c + +/ÿ/i + \xff +\= Expect no match + y + +/(.)\1/i + \xfe\xde + /\W+/ >>>\xaa<<< >>>\xba<<< @@ -101,4 +110,4 @@ /[[:alpha:]][[:lower:]][[:upper:]]/IB -# End of testinput3 +# End of wintestinput3 diff --git a/testdata/wintestoutput3 b/testdata/wintestoutput3 index b1894b6..171b7fb 100644 --- a/testdata/wintestoutput3 +++ b/testdata/wintestoutput3 @@ -1,5 +1,5 @@ # This set of tests checks local-specific features, using the "fr_FR" locale. -# It is not Perl-compatible. When run via RunTest, the locale is edited to +# It is almost Perl-compatible. When run via RunTest, the locale is edited to # be whichever of "fr_FR", "french", or "fr" is found to exist. There is # different version of this file called wintestinput3 for use on Windows, # where the locale is called "french" and the tests are run using @@ -8,8 +8,7 @@ #forbid_utf /^[\w]+/ - *** Failers -No match +\= Expect no match École No match @@ -17,41 +16,31 @@ No match École 0: École -/^[\w]+/ - *** Failers -No match - École -No match - /^[\W]+/ École 0: \xc9 /^[\W]+/locale=french - *** Failers - 0: *** +\= Expect no match École No match /[\b]/ \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /[\b]/locale=french \b 0: \x08 - *** Failers -No match +\= Expect no match a No match /^\w+/ - *** Failers -No match +\= Expect no match École No match @@ -66,18 +55,14 @@ No match 2: cole /(.+)\b(.+)/locale=french - *** Failers - 0: *** Failers - 1: *** - 2: Failers +\= Expect no match École No match /École/i École 0: \xc9cole - *** Failers -No match +\= Expect no match école No match @@ -89,17 +74,17 @@ No match /\w/I Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z Subject length lower bound = 1 /\w/I,locale=french Capture group count = 0 -Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P - Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z - ƒ Š Œ Ž š œ ž Ÿ ª ² ³ µ ¹ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö - Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý - þ ÿ +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ƒ Š Œ Ž š œ ž Ÿ ª ² ³ µ ¹ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö + Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý + þ ÿ Subject length lower bound = 1 # All remaining tests are in the french locale, so set the default. @@ -115,11 +100,29 @@ Subject length lower bound = 1 /^[\xc8-\xc9]/ École 0: É - *** Failers -No match +\= Expect no match école No match +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + +/ÿ/i + \xff + 0: ÿ +\= Expect no match + y +No match + +/(.)\1/i + \xfe\xde + 0: þÞ + 1: þ + /\W+/ >>>\xaa<<< 0: >>> @@ -166,10 +169,10 @@ No match End ------------------------------------------------------------------ Capture group count = 0 -Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z - a b c d e f g h i j k l m n o p q r s t u v w x y z ƒ Š Œ Ž š œ ž Ÿ ª µ º - À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å - æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ƒ Š Œ Ž š œ ž Ÿ ª µ º + À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å + æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ Subject length lower bound = 3 -# End of testinput3 +# End of wintestinput3 diff --git a/vms/configure.com b/vms/configure.com index b50365d..831f38f 100644 --- a/vms/configure.com +++ b/vms/configure.com @@ -496,12 +496,17 @@ sure both macros are undefined; an emulation function will then be used. */ /* Define this if your compiler supports __attribute__((uninitialized)) */ #undef HAVE_ATTRIBUTE_UNINITIALIZED +/* Define to 1 if you have the header file. */ +#define HAVE_ASSERT_H 1 + /* Define to 1 if you have the 'bcopy' function. */ #define HAVE_BCOPY 1 /* Define this if your compiler provides __builtin_mul_overflow() */ #undef HAVE_BUILTIN_MUL_OVERFLOW +/* Define this if your compiler provides __builtin_unreachable() */ +#undef HAVE_BUILTIN_UNREACHABLE /* Define to 1 if you have the header file. */ #define HAVE_DIRENT_H 1 @@ -1032,6 +1037,9 @@ PCRE2_CHKDINT.OBJ : PCRE2_CHKDINT.C PCRE2_COMPILE.OBJ : PCRE2_COMPILE.C $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) +PCRE2_COMPILE.OBJ : PCRE2_COMPILE_CLASS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + PCRE2_CONFIG.OBJ : PCRE2_CONFIG.C $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) -- 2.30.2